@Preamble{
"\hyphenation{
Cor-vi
Dough-er-ty
Jo-seph
Nov-em-ber
}" #
"\ifx \undefined \booktitle \def \booktitle#1{{{\em #1}}} \fi" #
"\ifx \undefined \circled \def \circled #1{(#1)}\fi" #
"\ifx \undefined \booktitle \def \booktitle #1{{{\em #1}}} \fi" #
"\ifx \undefined \reg \def \reg {\circled{R}}\fi" #
"\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|http://www.math.utah.edu/~beebe/|"}
@String{inst-ANL-mcs = "Mathematics and Computer Science
Division, Argonne National Laboratory"}
@String{inst-ANL:adr = "9700 South Cass Avenue, Argonne, IL
60439-4801, USA"}
@String{inst-CERFACS = "CERFACS"}
@String{inst-CERFACS:adr = "Toulouse, France"}
@String{inst-EMORY = "Emory University"}
@String{inst-EMORY:adr = "Atlanta, GA, USA"}
@String{inst-IBM-WATSON = "IBM T. J. Watson Research Center"}
@String{inst-IBM-WATSON:adr = "Yorktown Heights, NY, USA"}
@String{inst-MSU = "Mississippi State University"}
@String{inst-MSU:adr = "Starkville, MS, USA"}
@String{inst-NLRC = "NASA Langley Research Center"}
@String{inst-NLRC:adr = "Hampton, VA, USA"}
@String{inst-ORNL = "Oak Ridge National Laboratory"}
@String{inst-ORNL:adr = "Knoxville, TN, USA"}
@String{inst-SCS-CMU = "School of Computer Science,
Carnegie Mellon University"}
@String{inst-SCS-CMU:adr = "Pittsburgh, PA, USA"}
@String{inst-UAL-EE = "Department of Electrical
Engineering, University of Alabama"}
@String{inst-UAL-EE:adr = "Tuscaloosa, AL, USA"}
@String{inst-UGA = "University of Georgia"}
@String{inst-UGA:adr = "Athens, GA, USA"}
@String{inst-UTK = "University of Tennessee, Knoxville"}
@String{inst-UTK:adr = "Knoxville, TN 37996, USA"}
@String{inst-UTK-CS = "Department of Computer Science, University
of Tennessee, Knoxville"}
@String{inst-UTK-CS:adr = "Knoxville, TN 37996, USA"}
@String{j-ACM-COMM-COMP-ALGEBRA = "ACM Communications in Computer Algebra"}
@String{j-ACM-J-EXP-ALGORITHMICS = "ACM Journal of Experimental Algorithmics"}
@String{j-ADV-COMPUT-MATH = "Advances in computational mathematics"}
@String{j-ADV-WATER-RESOURCES = "Advances in water resources"}
@String{j-AIAA-ASME-ASCE-AHS-STRUCT-STRUCT-DYN-MAT-CONF = "AIAA/ASME/ASCE/AHS
Structures, Structural Dynamics \& Materials
Conference --- Collection of Technical
Papers"}
@String{j-ALGORITHMICA = "Algorithmica"}
@String{j-ALGORITHMS-BASEL = "Algorithms ({Basel})"}
@String{j-APPL-MATH-COMP = "Applied Mathematics and Computation"}
@String{j-APPL-NUM-MATH = "Applied Numerical Mathematics: Transactions
of IMACS"}
@String{j-AUSTRALIAN-COMP-SCI-COMM = "Australian Computer Science
Communications"}
@String{j-BIOMETRICS = "Biometrics"}
@String{j-CACM = "Communications of the ACM"}
@String{j-CCPE = "Concurrency and Computation: Prac\-tice and
Experience"}
@String{j-CGF = "Com{\-}pu{\-}ter Graphics Forum"}
@String{j-CHIN-J-COMPUTERS = "Chinese Journal of Computers"}
@String{j-COMP-ARCH-NEWS = "ACM SIGARCH Computer Architecture News"}
@String{j-COMP-ART-INTELL = "Computers and Artificial Intelligence =
Vychislitel'nye mashiny i
iskusstvennyi intellekt"}
@String{j-COMP-CHEM-ENG = "Computers \& Chemical Engineering"}
@String{j-COMP-ECONOMICS = "Computational Economics"}
@String{j-COMP-J = "The Computer Journal"}
@String{j-COMP-LANGS-SYS-STRUCT = "Computer Languages, Systems and Structures"}
@String{j-COMP-MECH = "Computational mechanics"}
@String{j-COMP-NET-AMSTERDAM = "Computer Networks (Amsterdam, Netherlands: 1999)"}
@String{j-COMP-PHYS-COMM = "Computer Physics Communications"}
@String{j-COMP-STAT = "Computational Statistics"}
@String{j-COMP-SURV = "ACM Computing Surveys"}
@String{j-COMP-SYS = "Computing systems: the journal of the
USENIX Association"}
@String{j-COMPUT-MATH-APPL = "Computers and Mathematics with Applications"}
@String{j-COMPUT-METH-APPL-MECH-ENG = "Computer Methods in Applied Mechanics
and Engineering"}
@String{j-COMPUT-PHYS = "Computers in Physics"}
@String{j-COMPUT-SCI-ENG = "Computing in Science and Engineering"}
@String{j-COMPUT-SYST-ENG = "Computing systems in engineering: an
international journal"}
@String{j-COMPUTER = "Computer"}
@String{j-COMPUTERS-AND-GRAPHICS = "Computers and Graphics"}
@String{j-COMPUTING = "Computing"}
@String{j-CPE = "Concurrency: practice and experience"}
@String{j-CRAY-CHANNELS = "CRAY Channels"}
@String{j-DEC-TECH-J = "Digital Technical Journal of Digital
Equipment Corporation"}
@String{j-DISCRETE-APPL-MATH = "Discrete Applied Mathematics"}
@String{j-ELECT-LETTERS = "Electronics Letters"}
@String{j-ENG-SCI-REP-KYUSHU = "Engineering Sciences Reports, Kyushu
University"}
@String{j-FORTRAN-FORUM = "ACM Fortran Forum"}
@String{j-FRONTIERS-MASS-PAR-COMP-CONF-PROC = "Frontiers of Massively Parallel
Computation --- Conference Proceedings"}
@String{j-FUT-GEN-COMP-SYS = "Future Generation Computer Systems"}
@String{j-FUTURE-INTERNET = "Future Internet"}
@String{j-HIGH-TECH-LETT = "High Technology Letters"}
@String{j-HUMAN-HEREDITY = "Human heredity"}
@String{j-IBM-JRD = "IBM Journal of Research and Development"}
@String{j-IBM-SYS-J = "IBM Systems Journal"}
@String{j-IEEE-COMPUT-ARCHIT-LETT = "IEEE Computer Architecture Letters"}
@String{j-IEEE-COMPUT-SCI-ENG = "IEEE Computational Science \& Engineering"}
@String{j-IEEE-CONCURR = "IEEE Concurrency"}
@String{j-IEEE-DISTRIB-SYST-ONLINE = "IEEE Distributed Systems Online"}
@String{j-IEEE-INT-CONF-ALG-ARCH-PAR-PROC = "IEEE International Conference on
Algorithms and Architectures for Parallel
Processing"}
@String{j-IEEE-J-SEL-AREAS-COMMUN = "IEEE Journal on Selected Areas in
Communications"}
@String{j-IEEE-MICRO = "IEEE Micro"}
@String{j-IEEE-MICROW-GUIDED-WAVE-LETT = "IEEE Microwave and Guided Wave
Letters"}
@String{j-IEEE-PAR-DIST-TECH = "IEEE parallel and distributed technology:
systems and applications"}
@String{j-IEEE-TRANS-COMPUT = "IEEE Transactions on Computers"}
@String{j-IEEE-TRANS-PAR-DIST-SYS = "IEEE Transactions on Parallel and
Distributed Systems"}
@String{j-IEEE-TRANS-SOFTW-ENG = "IEEE Transactions on Software Engineering"}
@String{j-IEEE-TRANS-VIS-COMPUT-GRAPH = "IEEE Transactions on Visualization
and Computer Graphics"}
@String{j-IFIP-TRANS-A = "IFIP Transactions. A. Computer Science and
Technology"}
@String{j-IJHPCA = "The International Journal of High
Performance Computing Applications"}
@String{j-IJQC = "International Journal of Quantum Chemistry"}
@String{j-IJSA = "The International Journal of Supercomputer
Applications"}
@String{j-IJSAHPC = "International Journal of Supercomputer
Applications and High Performance Computing"}
@String{j-INFO-SOFTWARE-TECH = "Information and Software Technology"}
@String{j-INFORMATICA = "Informatica (Ljubljana, Slovenia)"}
@String{j-INT-J-COMPUT-APPL = "International Journal of Computer
Applications"}
@String{j-INT-J-COMPUT-SYST-SCI-ENG = "International Journal of Computer
Systems Science and Engineering"}
@String{j-INT-J-HIGH-SPEED-COMPUTING = "International Journal of High Speed
Computing"}
@String{j-INT-J-IMAGE-GRAPHICS = "International Journal of Image and Graphics
(IJIG)"}
@String{j-INT-J-NUMER-METHODS-FLUIDS = "International Journal for Numerical
Methods in Fluids"}
@String{j-INT-J-PAR-EMER-DIST-SYS = "International Journal of Parallel, Emergent
and Distributed Systems: IJPEDS"}
@String{j-INT-J-PARALLEL-PROG = "International Journal of Parallel
Programming"}
@String{j-INTEL-TECH-J = "Intel Technology Journal"}
@String{j-IT-IT = "Informationstechnik und technische
Informatik: IT + TI"}
@String{j-J-APPL-ECONOMETRICS = "Journal of Applied Econometrics"}
@String{j-J-APPL-PHYS = "Journal of Applied Physics"}
@String{j-J-COMP-SCI-TECH = "Journal of computer science and technology"}
@String{j-J-COMP-SYS-SCI = "Journal of Computer and System Sciences"}
@String{j-J-COMPUT-APPL-MATH = "Journal of Computational and Applied
Mathematics"}
@String{j-J-COMPUT-BIOL = "Journal of Computational Biology"}
@String{j-J-COMPUT-CHEM = "Journal of Computational Chemistry"}
@String{j-J-COMPUT-PHYS = "Journal of Computational Physics"}
@String{j-J-GRID-COMP = "Journal of Grid Computing"}
@String{j-J-MOL-STRUCT-THEOCHEM = "Journal of molecular structure. Theochem"}
@String{j-J-OPEN-RES-SOFT = "Journal of Open Research Software"}
@String{j-J-PAR-DIST-COMP = "Journal of Parallel and Distributed
Computing"}
@String{j-J-PHYS-IV-COLLOQUE = "Journal de physique. IV, Colloque"}
@String{j-J-PROGRAM-LANG = "Journal of Programming Languages"}
@String{j-J-SCI-COMPUT = "Journal of Scientific Computing"}
@String{j-J-STAT-SOFT = "Journal of Statistical Software"}
@String{j-J-SUPERCOMPUTING = "The Journal of Supercomputing"}
@String{j-J-SYST-SOFTW = "The Journal of Systems and Software"}
@String{j-J-UCS = "J.UCS: Journal of Universal Computer
Science"}
@String{j-JETC = "ACM Journal on Emerging Technologies
in Computing Systems (JETC)"}
@String{j-JOHO-SHORI = "Joho-Shori (J. Information Processing Soc.
Japan)"}
@String{j-LECT-NOTES-COMP-SCI = "Lecture Notes in Computer Science"}
@String{j-LINUX-J = "Linux Journal"}
@String{j-MICROCOMP-CIVIL-ENG = "Microcomputers in Civil Engineering"}
@String{j-MICROPROC-MICROPROG = "Microprocessing and Microprogramming"}
@String{j-MINI-MICRO-SYSTEMS = "Mini-Micro Systems"}
@String{j-NETWORK-SECURITY = "Network Security"}
@String{j-NEURAL-PAR-SCI-COMPUT = "Neural, Parallel and Scientific
Computations"}
@String{j-NUCL-SCI-ENG = "Nuclear Science and Engineering"}
@String{j-NUCLEAR-SAFETY = "Nuclear safety"}
@String{j-NUMER-ALGORITHMS = "Numerical Algorithms"}
@String{j-OPER-SYS-REV = "Operating Systems Review"}
@String{j-PACMPL = "Proceedings of the ACM on Programming
Languages (PACMPL)"}
@String{j-PARALLEL-ALGORITHMS-APPL = "Parallel Algorithms and Applications"}
@String{j-PARALLEL-COMPUTING = "Parallel Computing"}
@String{j-PARALLEL-DIST-COMP-PRACT = "Parallel and Distributed Computing
Practices"}
@String{j-PARALLEL-PROCESS-LETT = "Parallel Processing Letters"}
@String{j-PARALLELOGRAM = "Parallelogram"}
@String{j-POMACS = "Proceedings of the ACM on Measurement and
Analysis of Computing Systems (POMACS)"}
@String{j-PROC-INT-CONF-PAR-PROC = "Proceedings of the International
Conference on Parallel Processing"}
@String{j-PROC-SPIE = "Proceedings of the SPIE --- The
International Society for Optical
Engineering"}
@String{j-PROC-SUPERCOMPUT = "Proceedings of the Supercomputing
Conference"}
@String{j-PROC-VLDB-ENDOWMENT = "Proceedings of the VLDB Endowment"}
@String{j-PROGRAMMIROVANIE = "Programmirovanie"}
@String{j-QUEUE = "ACM Queue: Tomorrow's Computing Today"}
@String{j-R-JOURNAL = "The R Journal"}
@String{j-R-NEWS = "R News: the Newsletter of the R Project"}
@String{j-REAL-TIME-IMAGING = "Real-Time Imaging"}
@String{j-SCI-COMPUT-PROGRAM = "Science of Computer Programming"}
@String{j-SCI-PROG = "Scientific Programming"}
@String{j-SCPE = "Scalable Computing: Practice and
Experience"}
@String{j-SIAM-J-OPT = "SIAM Journal on Optimization"}
@String{j-SIAM-J-SCI-COMP = "SIAM Journal on Scientific Computing"}
@String{j-SIAM-NEWS = "SIAM News"}
@String{j-SIGADA-LETTERS = "ACM SIGADA Ada Letters"}
@String{j-SIGCSE = "SIGCSE Bulletin (ACM Special Interest Group
on Computer Science Education)"}
@String{j-SIGMETRICS = "ACM SIGMETRICS Performance Evaluation
Review"}
@String{j-SIGMOD = "SIGMOD Record (ACM Special Interest
Group on Management of Data)"}
@String{j-SIGNAL-PROCESS-IMAGE-COMMUN = "Signal Processing: Image
Communication"}
@String{j-SIGPLAN = "ACM SIG{\-}PLAN Notices"}
@String{j-SIGSAM = "SIGSAM Bulletin (ACM Special Interest Group
on Symbolic and Algebraic Manipulation)"}
@String{j-SIGSOFT = "ACM SIGSOFT Software Engineering Notes"}
@String{j-SIM-MODEL-PRACT-THEORY = "Simulation Modelling Practice and Theory"}
@String{j-SOFTWAREX = "SoftwareX"}
@String{j-SPE = "Soft{\-}ware\emdash Prac{\-}tice and
Experience"}
@String{j-STAT-COMPUT = "Statistics and Computing"}
@String{j-SUPERCOMPUTER = "Supercomputer"}
@String{j-SUPERFRI = "Supercomputing Frontiers and Innovations"}
@String{j-TACO = "ACM Transactions on Architecture and
Code Optimization"}
@String{j-TCBB = "IEEE/ACM Transactions on Computational
Biology and Bioinformatics"}
@String{j-TECS = "ACM Transactions on Embedded Computing
Systems"}
@String{j-TKDD = "ACM Transactions on Knowledge
Discovery from Data (TKDD)"}
@String{j-TOCE = "ACM Transactions on Computing Education"}
@String{j-TOCL = "ACM Transactions on Computational Logic"}
@String{j-TOCS = "ACM Transactions on Computer Systems"}
@String{j-TODAES = "ACM Transactions on Design Automation of
Electronic Systems"}
@String{j-TOG = "ACM Transactions on Graphics"}
@String{j-TOMACS = "ACM Transactions on Modeling and
Computer Simulation"}
@String{j-TOMCCAP = "ACM Transactions on Multimedia Computing,
Communications, and Applications"}
@String{j-TOMPECS = "ACM Transactions on Modeling and Performance
Evaluation of Computing Systems (TOMPECS)"}
@String{j-TOMS = "ACM Transactions on Mathematical Software"}
@String{j-TOPC = "ACM Transactions on Parallel Computing
(TOPC)"}
@String{j-TOPLAS = "ACM Transactions on Programming Languages
and Systems"}
@String{j-TOSEM = "ACM Transactions on Software Engineering
and Methodology"}
@String{j-TRANS-AM-NUCL-SOC = "Transactions of the American Nuclear
Society"}
@String{j-TRANS-INFO-PROCESSING-SOC-JAPAN = "Transactions of the Information
Processing Society of Japan"}
@String{j-TRETS = "ACM Transactions on Reconfigurable Technology
and Systems (TRETS)"}
@String{j-TSAS = "ACM Transactions on Spatial Algorithms and
Systems (TSAS)"}
@String{j-VLDB-J = "VLDB Journal: Very Large Data Bases"}
@String{pub-ACM = "ACM Press"}
@String{pub-ACM:adr = "New York, NY 10036, USA"}
@String{pub-AIP = "American Institute of Physics"}
@String{pub-AIP:adr = "Woodbury, NY, USA"}
@String{pub-ASME = "American Society Mech. Engineers"}
@String{pub-ASME:adr = "United Engineering Center, 345 E. 47th St.,
New York, NY 10017, USA"}
@String{pub-AW = "Ad{\-d}i{\-s}on-Wes{\-l}ey"}
@String{pub-AW:adr = "Reading, MA, USA"}
@String{pub-BIRKHAUSER = "Birkh{\"a}user"}
@String{pub-BIRKHAUSER:adr = "Cambridge, MA, USA; Berlin, Germany; Basel,
Switzerland"}
@String{pub-CAMBRIDGE = "Cambridge University Press"}
@String{pub-CAMBRIDGE:adr = "Cambridge, UK"}
@String{pub-CHAPMAN-HALL = "Chapman and Hall, Ltd."}
@String{pub-CHAPMAN-HALL:adr = "London, UK"}
@String{pub-CHAPMAN-HALL-CRC = "Chapman and Hall/CRC"}
@String{pub-CHAPMAN-HALL-CRC:adr = "Boca Raton, FL, USA"}
@String{pub-CRC = "CRC Press"}
@String{pub-CRC:adr = "2000 N.W. Corporate Blvd., Boca Raton,
FL 33431-9868, USA"}
@String{pub-ELS = "Elsevier"}
@String{pub-ELS:adr = "Amsterdam, The Netherlands"}
@String{pub-ELSAS = "Elsevier Applied Science"}
@String{pub-ELSAS:adr = "London, UK"}
@String{pub-IEEE = "IEEE Computer Society Press"}
@String{pub-IEEE:adr = "1109 Spring Street, Suite 300, Silver
Spring,
MD 20910, USA"}
@String{pub-IOS = "IOS Press"}
@String{pub-IOS:adr = "Postal Drawer 10558, Burke, VA
2209-0558, USA"}
@String{pub-KLUWER = "Kluwer Academic Publishers Group"}
@String{pub-KLUWER:adr = "Norwell, MA, USA, and Dordrecht,
The Netherlands"}
@String{pub-MCGRAW-HILL = "Mc{\-}Graw-Hill"}
@String{pub-MCGRAW-HILL:adr = "New York, NY, USA"}
@String{pub-MIT = "MIT Press"}
@String{pub-MIT:adr = "Cambridge, MA, USA"}
@String{pub-MORGAN-KAUFMANN = "Morgan Kaufmann Publishers"}
@String{pub-MORGAN-KAUFMANN:adr = "Los Altos, CA 94022, USA"}
@String{pub-MORGAN-KAUFMANN:adrnew = "2929 Campus Drive, Suite 260, San
Mateo, CA 94403, USA"}
@String{pub-NASA = "National Aeronautics and Space
Administration"}
@String{pub-NASA:adr = "Washington, DC, USA"}
@String{pub-NH = "North-Hol{\-}land"}
@String{pub-NH:adr = "Amsterdam, The Netherlands"}
@String{pub-NTIS = "National Technical Information Service"}
@String{pub-NTIS:adr = "Washington, DC, USA"}
@String{pub-ORA = "O'Reilly \& {Associates, Inc.}"}
@String{pub-ORA:adr = "981 Chestnut Street, Newton, MA 02164, USA"}
@String{pub-OXFORD = "Oxford University Press"}
@String{pub-OXFORD:adr = "Walton Street, Oxford OX2 6DP, UK"}
@String{pub-PHI = "Pren{\-}tice-Hall International"}
@String{pub-PHI:adr = "Englewood Cliffs, NJ 07632, USA"}
@String{pub-PLENUM = "Plenum Press"}
@String{pub-PLENUM:adr = "New York, NY, USA"}
@String{pub-SCRI = "Supercomputing Computations
Research Institute, Florida State
University"}
@String{pub-SCRI:adr = "Tallahassee, FL, USA"}
@String{pub-SIAM = "Society for Industrial and Applied
Mathematics"}
@String{pub-SIAM:adr = "Philadelphia, PA, USA"}
@String{pub-SPE = "Society of Petroleum Engineers"}
@String{pub-SPE:adr = "Richardson, TX, USA"}
@String{pub-SPIE = "Society of Photo-optical
Instrumentation Engineers (SPIE)"}
@String{pub-SPIE:adr = "Bellingham, WA, USA"}
@String{pub-SUN-MICROSYSTEMS-PRESS = "Sun Microsystems Press"}
@String{pub-SUN-MICROSYSTEMS-PRESS:adr = "Palo Alto, CA, USA"}
@String{pub-SV = "Spring{\-}er-Ver{\-}lag"}
@String{pub-SV:adr = "Berlin, Germany~/ Heidelberg,
Germany~/ London, UK~/ etc."}
@String{pub-USENIX = "USENIX"}
@String{pub-USENIX:adr = "Berkeley, CA, USA"}
@String{pub-WILEY = "Wiley"}
@String{pub-WILEY:adr = "New York, NY, USA"}
@String{pub-WORLD-SCI = "World Scientific Publishing
Co. Pte. Ltd."}
@String{pub-WORLD-SCI:adr = "P. O. Box 128, Farrer Road,
Singapore 9128"}
@String{ser-LNAI = "Lecture Notes in Artificial Intelligence"}
@String{ser-LNCS = "Lecture Notes in Computer Science"}
@String{ser-LNCSE = "Lecture Notes in Computational
Science and Engineering"}
@Article{Abrossimov:1989:GVM,
author = "V. Abrossimov and M. Rozier and M. Shapiro",
title = "Generic virtual memory management for operating system
kernels",
journal = j-OPER-SYS-REV,
volume = "23",
number = "5",
pages = "123--136",
year = "1989",
CODEN = "OSRED8",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Sun Dec 22 10:16:35 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Chorus Systemes, Saint-Quentin-en-Yvelines, France",
classification = "C6120 (File organisation); C6150J (Operating
systems)",
fjournal = "Operating Systems Review",
keywords = "Chorus Nucleus; Consistent cache; Data caching;
Deferred copying; Explicit I/O; Generic Memory
management Interface; History object technique; Mapped
objects; Operating system kernel; Paged architectures;
Paged Virtual Memory manager; PVM; Real memory; Unix",
thesaurus = "Buffer storage; Operating systems [computers]; Virtual
storage",
}
@InProceedings{Poplawski:1989:MPP,
author = "D. A. Poplawski and S. Pahwa and J. M. Francioni",
title = "Models of parallel program behavior",
crossref = "Anonymous:1989:PFC",
pages = "857--860 (vol. 2)",
year = "1989",
bibdate = "Sun Dec 22 10:16:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan Technol. Univ.,
Houghton, MI, USA",
classification = "C4240 (Programming and algorithm theory); C6110
(Systems analysis and programming); C6120 (File
organisation)",
keywords = "Distributed memory MIMD; Hypercube programs; Parallel
program behavior; Parallel virtual memory; PVM",
thesaurus = "Hypercube networks; Parallel programming; Virtual
storage",
}
@InProceedings{Feeley:1990:PVM,
author = "Marc Feeley and James S. Miller",
booktitle = "{Proceedings of the 1990 ACM Conference on LISP and
Functional Programming, Nice}",
title = "A parallel virtual machine for efficient {Scheme}
compilation",
crossref = "ACM:1990:PAC",
publisher = pub-ACM,
address = pub-ACM:adr,
bookpages = "????",
pages = "119--130",
month = jun,
year = "1990",
bibdate = "Wed Jan 24 04:51:56 MST 2001",
bibsource = "http://dblp.uni-trier.de/db/conf/lfp/lfp1990.html#FeeleyM90;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/citations/proceedings/lfp/91556/p119-feeley/",
abstract = "Programs compiled by Gambit, our Scheme compiler,
achieve performance as much as twice that of the
fastest available Scheme compilers. Gambit is easily
ported, while retaining its high performance, through
the use of a simple virtual machine (PVM). PVM allows a
wide variety of machine-independent optimizations and
it supports parallel computation based on the future
construct. PVM conveys high-level information
bidirectionally between the machine-independent front
end of the compiler and the machine-dependent back end,
making it easy to implement a number of common back end
optimizations that are difficult to achieve for other
virtual machines. PVM is similar to many real computer
architectures and has an option to efficiently gather
dynamic measurements of virtual machine usage. These
measurements can be used in performance prediction for
ports to other architectures as well as design
decisions related to proposed optimizations and object
representations.",
acknowledgement = ack-nhfb,
affiliation = "Brandeis Univ., Waltham, MA, USA",
classification = "C6150C (Compilers, interpreters and other
processors)",
conflocation = "Nice, France; 27--29 June 1990",
corpsource = "Brandeis Univ., Waltham, MA, USA",
keywords = "Gambit; Lisp; machine-independent; Machine-independent
front end; machine-independent front end;
Machine-independent optimizations; object; Object
representations; optimizations; parallel processing;
Parallel virtual machine; parallel virtual machine;
portability; program compilers; PVM portability;
representations; Scheme compiler; simple virtual
machine; Simple virtual machine; software; virtual
machines",
oldlabel = "FeeleyM90",
sponsororg = "ACM",
thesaurus = "Parallel processing; Program compilers; Software
portability; Virtual machines",
treatment = "P Practical",
XMLdata = "ftp://ftp.informatik.uni-trier.de/pub/users/Ley/bib/records.tar.gz#conf/lfp/FeeleyM90",
}
@TechReport{Sunderam:1990:PFPa,
author = "V. S. Sunderam",
title = "{PVM}: a Framework for Parallel Distributed
Computing",
number = "ORNL/TM-11375",
institution = "Dept. of Math and Computer Science, " # inst-EMORY,
address = inst-EMORY:adr,
month = feb,
year = "1990",
bibsource = "Distributed/dist.sys.1.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See also \cite{Sunderam:1990:PFPb}.",
comment = "Good overview of PVM, though now a little out of date.
Supports dynamic, location-transparent, process
initiation, typed message passing and shared memory,
broadcast and distributed synchronization, and
heterogeneity in the form of language- and
machine-independence, type conversion, and multiple
executables for each component. Seems to be heavily
dependent on broadcast. Shared memory is somewhat
limited. See also beguelin:concsuper. [David.Kotz at
Dartmouth.edu]",
keyword = "heterogeneous computing, distributed computing,
network parallel computing",
}
@Article{Sunderam:1990:PFPb,
author = "V. S. Sunderam",
title = "{PVM}: a Framework for Parallel Distributed
Computing",
journal = j-CPE,
volume = "2",
number = "4",
pages = "315--339",
month = dec,
year = "1990",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 05:40:19 MDT 1999",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Misc/IMMD_IV.bib",
note = "See also the earlier technical report
\cite{Sunderam:1990:PFPa}.",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
classification = "C6115 (Programming support)",
corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
fjournal = "Concurrency, practice and experience",
keywords = "algorithms; Algorithms; concurrent; Concurrent;
conditional execution; Conditional execution;
distributed processing; environment; environments;
error detection; Error detection; interface; Interface;
parallel distributed computing; Parallel distributed
computing; parallel programming; programming;
Programming environment; PVM system; sequential;
Sequential; virtual computing; Virtual computing
environment",
pubcountry = "UK",
thesaurus = "Distributed processing; Parallel programming;
Programming environments",
treatment = "P Practical",
}
@Article{Balou:1991:DIV,
author = "A. T. Balou and A. N. Refenes",
title = "The design and implementation of {VOOM}: a parallel
virtual object oriented machine",
journal = j-MICROPROC-MICROPROG,
volume = "32",
number = "1-5",
pages = "289--296",
month = aug,
year = "1991",
CODEN = "MMICDT",
ISSN = "0165-6074 (print), 1878-7061 (electronic)",
ISSN-L = "0165-6074",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220 (Computer architecture); C6110 (Systems
analysis and programming); C7430 (Computer
engineering)",
conflocation = "Vienna, Austria; 2-5 Sept. 1991",
conftitle = "17th EUROMICRO Symposium on Microprocessing and
Microprogramming. Hardware and Software Design
Automation",
corpsource = "Dept. of Comput. Sci., Univ. Coll. London, UK",
fjournal = "Microprocessing and Microprogramming",
keywords = "design; execution unit; implementation; machine;
machines; memory management unit; memory recycling;
object management; object-oriented model;
object-oriented programming; packet-switching network;
parallel architecture; parallel architectures; parallel
virtual object oriented; pre-fetch unit; virtual",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@InProceedings{Beguelin:1991:GDT,
author = "Adam Beguelin and Jack J. Dongarra and A. Geist and
Robert Manchek and V. S. Sunderam",
title = "Graphical Development Tools for Network-Based
Concurrent Supercomputing",
crossref = "IEEE:1991:PSA",
pages = "435--444",
year = "1991",
bibdate = "Sun Dec 22 10:17:16 MST 1996",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/clusters.bib;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat Lab., TN, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6180G (Graphical user interfaces)",
comment = "Clusters of workstations solving supercomputing
problems. This is a graphical front-end to PVM, that
allows the user to specify a set of subroutines, their
parameters and output values, and the dependencies
between them. It can compile the parts on multiple
machines. At run time it chooses where to execute each
module, and when, based on the dependencies and on a
user-supplied cost matrix showing the cost of running
each module in each place. See also beguelin:hence.
[David.Kotz at Dartmouth.edu]",
keyword = "network supercomputing, distributed computing",
keywords = "Application program; Graphical development tools;
HeNCE; Heterogeneous network computing environment;
Integrated graphical tools; Network-based concurrent
supercomputing; Parallel programs; Parallel Virtual
Machine; Process management and communication; PVM;
Software package; X-window-based software environment",
thesaurus = "Graphical user interfaces; Parallel programming;
Programming environments",
}
@TechReport{Beguelin:1991:UGP,
author = "A. Beguelin and J. Dongarra and A. Geist and R.
Manchek and V. Sunderam",
title = "A User's guide to {PVM}: Parallel virtual machine",
type = "Technical Report",
number = "ORNL/TM-11826",
institution = "Mathematical Sciences Section, Oak Ridge National
Laboratory",
address = inst-ORNL:adr,
month = sep,
year = "1991",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/par.lin.alg.bib; Theory/Matrix.bib",
keywords = "prll, operating system",
}
@InProceedings{Benzoni:1991:MFR,
author = "A. Benzoni and V. S. Sunderam and R. van de Guijn",
title = "Matrix factorization on a {RISC} workstation network",
crossref = "Durand:1991:HPC",
pages = "207--218",
year = "1991",
bibdate = "Sun Dec 22 10:17:16 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM ECSEC, Roma, Italy",
classification = "C4140 (Linear algebra); C5220 (Computer
architecture); C5470 (Performance evaluation and
testing); C5620L (Local area networks)",
keywords = "20 To 60 MFLOPS; Concurrent process management; Dense
matrix; Distributed memory architecture; Distributed
programming environment; Ethernet; Heterogeneous
distributed computing environment; High-speed network;
Independent processing units; LU factorization;
Numerically intensive applications; Optical fiber link;
PVM; RISC System/6000 workstations; RISC workstation
network; Synchronization; Token Ring local area
network",
numericalindex = "Computer speed 2.0E+07 to 6.0E+07 FLOPS",
thesaurus = "Distributed processing; Local area networks; Matrix
algebra; Optical links; Performance evaluation",
}
@Manual{Dongarra:1991:UGP,
author = "Jack Dongarra and others",
title = "A Users' Guide to {PVM} Parallel Virtual Machine",
organization = inst-ORNL,
address = inst-ORNL:adr,
month = Jul,
year = "1991",
bibsource = "Distributed/Dist.Sys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@TechReport{Geist:1991:ENB,
author = "G. A. Geist and V. S. Sunderam",
title = "Experiences with network based concurrent computing on
the {PVM} system",
number = "ORNL/TM-11760",
institution = inst-ORNL,
address = inst-ORNL:adr,
month = jan,
year = "1991",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Geist:1991:PSS,
author = "G. A. Geist and V. S. Sunderam",
title = "The {PVM} System: {Supercomputer} Level Concurrent
Computation on a Heterogeneous Network of
Workstations",
crossref = "Stout:1991:SDM",
pages = "258--261",
year = "1991",
bibsource = "Distributed/dist.sys.1.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
comment = "A more up-to-date, but shorter, overview of PVM and
its performance than sunderam:pvm. Good performance on
networks of IBM RS/6000s. [David.Kotz at
Dartmouth.edu]",
keyword = "distributed heterogeneous computing",
}
@Article{Meleshchuk:1991:IPP,
author = "S. B. Meleshchuk and A. N. Nedumov",
title = "Implementation of a protocol for parallel database
access with virtual machine communications facilities",
journal = j-PROGRAMMIROVANIE,
volume = "17",
number = "1",
pages = "35--42",
month = jan # "\slash " # feb,
year = "1991",
CODEN = "PCSODA",
ISSN = "0132-3474, 0361-7688",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "English translation in Programming and Computer
Software, vol. 17, no. 1, pp. 27--32, November 1991.",
acknowledgement = ack-nhfb,
classification = "C6150J (Operating systems)",
corpsource = "Leningrad Techn. State Univ., USSR",
fjournal = "Programmirovanie",
keywords = "COMMIT protocol; concurrency control; deadlock;
electronic mail; interrupts; IUCV mail facility;
machines; parallel; parallel database access;
processing; protocol; protocols; virtual; virtual
machines",
pubcountry = "USSR",
treatment = "P Practical",
}
@InProceedings{Nagaraj:1991:MHL,
author = "U. Nagaraj and U. S. Shukla",
title = "{MK}: a high level interface for message passing",
crossref = "Bhavsar:1991:SSJ",
pages = "493--502",
year = "1991",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150J (Operating systems)",
corpsource = "Centre for Dev. of Adv. Comput., Bangalore, India",
keywords = "communication interface; high level interface;
interconnection network technology; message passing
multicomputer; MK; network operating systems;
programming environment; software interfaces;
transputer network",
treatment = "P Practical",
}
@Article{Saltz:1991:MRT,
author = "J. Saltz and H. Berryman and J. Wu",
title = "Multiprocessors and Run-time Compilation",
journal = j-CPE,
volume = "3",
number = "6",
pages = "573--592",
month = dec,
year = "1991",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 05:40:19 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@MastersThesis{Al-Salman:1992:DIP,
author = "Abdulmalik Salman Al-Salman",
title = "Design and implementation of a profiler for the
Parallel Virtual Machine ({PVM}) system",
type = "M.S. thesis",
school = inst-UGA,
address = inst-UGA:adr,
pages = "vi + 51",
year = "1992",
bibdate = "Mon Jan 15 16:37:21 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Directed by Steven C. Cater.",
acknowledgement = ack-nhfb,
}
@InProceedings{Alfano:1992:DNA,
author = "M. Alfano and G. {Lo Re}",
title = "Distributing numerical algorithms: some experiences
with network computing system ({NCS}) and parallel
virtual machine ({PVM})",
crossref = "SCRI:1992:PWC",
year = "1992",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Beguelin:1992:HGD,
author = "A. Beguelin and J. Dongarra and A. Geist and R.
Manchek and K. Moore and R. Wade and V. Sunderam",
title = "{HeNCE}: graphical development tools for network-based
concurrent computing",
crossref = "IEEE:1992:PSH",
pages = "129--136",
year = "1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat. Lab., TN, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6130B (Graphics techniques); C6150C
(Compilers, interpreters and other processors); C6180G
(Graphical user interfaces)",
keywords = "Distributed virtual computer; Graphical development
tools; Graphical interface; Graphical parallel
programming environment; HeNCE; Heterogeneous machines;
Heterogeneous network computing environment;
Network-based concurrent computing; Program compiler;
Program debugging; PVM; Unix workstation; X Window",
thesaurus = "Graphical user interfaces; Parallel programming;
Program compilers; Program debugging; Programming
environments; Software tools",
}
@Article{Beguelin:1992:PHT,
author = "A. Beguelin and J. Dongarra and A. Geist and R.
Manchek and V. Sunderam",
title = "{PVM} and {HeNCE}: traversing the parallel
environment",
journal = j-CRAY-CHANNELS,
volume = "14",
number = "4",
pages = "22--25",
month = "Fall",
year = "1992",
CODEN = "CRCHE8",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Carnegie-Mellon Univ., Pittsburgh, PA, USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C6150N (Distributed
systems); C7430 (Computer engineering)",
corpsource = "Carnegie-Mellon Univ., Pittsburgh, PA, USA",
fjournal = "CRAY Channels",
keywords = "cost-effective use; Cost-effective use; Cray Research
MPP systems; diverse architectures; Diverse
architectures; diverse computer systems; Diverse
computer systems; HeNCE; Heterogeneous Network
Computing Environment; heterogeneous networks;
Heterogeneous networks; Machine; network operating
systems; networked resources; Networked resources;
packages; parallel; parallel machines; Parallel
Virtual; Parallel Virtual Machine; portability;
Portability; programming; PVM; software; software
packages; Software packages; virtual machines",
thesaurus = "Network operating systems; Parallel machines; Parallel
programming; Software packages; Virtual machines",
treatment = "P Practical; R Product Review",
}
@InProceedings{Beguelin:1992:SCG,
author = "A. Beguelin and J. Dongarra and A. Geist and R.
Manchek and V. Sunderam",
title = "Solving computational grand challenges using a network
of heterogeneous supercomputers",
crossref = "Dongarra:1992:PFS",
pages = "596--601",
year = "1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat. Lab., Tennessee Univ., Knoxville, TN,
USA",
classification = "C5440 (Multiprocessor systems and techniques);
C5620W (Other networks); C6110P (Parallel programming);
C6115 (Programming support); C7430 (Computer
engineering)",
keywords = "Computational grand challenges; Cray XMP; Flexibility;
High speed network; Intel iPSC/860; Network of
heterogeneous supercomputers; Parallel virtual machine;
Thinking Machines CM2; Virtual computer",
thesaurus = "Parallel processing; Parallel programming; Programming
environments; Virtual machines; Wide area networks",
}
@TechReport{Beguelin:1992:XTM,
author = "Adam Louis Beguelin",
title = "Xab: a tool for monitoring {PVM} programs",
institution = inst-SCS-CMU,
address = inst-SCS-CMU:adr,
day = "5",
month = jun,
year = "1992",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Benzoni:1992:CLF,
author = "A. Benzoni and G. Richelli and V. S. Sunderam",
title = "Concurrent {LU} factorization on workstation
networks",
crossref = "Evans:1992:PCP",
pages = "159--166",
year = "1992",
bibdate = "Sun Dec 22 10:17:16 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM ECSEC, Roma, Italy",
classification = "B0290H (Linear algebra); B6210L (Computer
communications); B6260 (Optical links and equipment);
C4140 (Linear algebra); C4240P (Parallel programming
and algorithm theory); C5620L (Local area networks)",
keywords = "6 MByte/s; Concurrent LU factorization; Dense matrix;
Ethernet network; Fiber optic links; IBM RISC
System/6000 workstations; Optical fiber links; PVM
software system; Workstation networks",
numericalindex = "Byte rate 6.0E+06 Byte/s",
thesaurus = "Local area networks; Matrix algebra; Optical links;
Parallel algorithms; Workstations",
}
@TechReport{Dongarra:1992:PUL,
author = "Jack J. Dongarra and Rolf Hempel and Anthony J. G. Hey
and David W. Walker",
title = "A Proposal for a User-Level Message-Passing Interface
in a Distributed Memory Environment",
type = "Technical Report",
number = "TM-12231",
institution = inst-ORNL,
address = inst-ORNL:adr,
month = oct,
year = "1992",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/Par.Arch.Indep.bib;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Duval:1992:TPP,
author = "D. Duval",
title = "Trends in parallel programming models for high
performance computers",
crossref = "Ferenczi:1992:AHW",
pages = "33",
year = "1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Parallel Comput. Div., TELMAT Inf., Soultz, France",
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming)",
keywords = "CS-Tools; F90; Heterogeneous scalable networks; High
Performance Fortran; Massively parallel machines;
Neural coprocessor; Parallel programming models;
PARMACS; PVM; Scientific applications; SHAPES ASI;
SPMD; Superscalar; Transputers; Vector facilities",
thesaurus = "Parallel programming; Programming theory; Software
engineering",
}
@InProceedings{Eppstein:1992:PGC,
author = "Margaret J. Eppstein and Joseph F. Guarnaccia and
David Emery Dougherty and Robert S. Kerr",
title = "Parallel groundwater computations using {PVM}",
crossref = "Russell:1992:CMW",
pages = "713--720",
year = "1992",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
annote = "Caption title. Published in Computational methods in
water resources IX, vol. 1, Numerical methods in water
resources. EPA/600/A-92/157 PB92-206572. Microfiche.
Springfield, VA: National Technical Information
Service, [1992]. 1 microfiche: negative.",
keywords = "Groundwater flow --- Computer programs",
}
@Book{Freeman:1992:PNA,
author = "T. L. (Len) Freeman and C. (Christopher) Phillips",
title = "Parallel numerical algorithms",
publisher = pub-PHI,
address = pub-PHI:adr,
pages = "xii + 315",
year = "1992",
ISBN = "0-13-651597-5",
ISBN-13 = "978-0-13-651597-5",
LCCN = "QA76.9.A43 F74 1992",
bibdate = "Mon Oct 07 09:13:23 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Chapter 5 discusses HPF and PVM.",
price = "US\$40.00",
series = "Prentice Hall International Series in Computer
Science",
acknowledgement = ack-nhfb,
}
@Article{Geist:1992:NBC,
author = "G. A. Geist and V. S. Sunderam",
title = "Network-based Concurrent Computing on the {PVM}
System",
journal = j-CPE,
volume = "4",
number = "4",
pages = "293--312 (or 293--311??)",
month = jun,
year = "1992",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6180G (Graphical user interfaces); C7430 (Computer
engineering)",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
fjournal = "Concurrency, practice and experience",
keywords = "Computational resource; computational resource;
Concurrent computing environment; concurrent computing
environment; coupled networks; graphical interface;
graphical user interfaces; interactive; Interactive
graphical interface; loosely; Loosely coupled networks;
machines; Multiprocessing; multiprocessing; parallel
processing; Parallel Virtual Machine; Performance;
performance; Porting; porting; PVM system; Software
package; software package; virtual",
pubcountry = "UK",
thesaurus = "Graphical user interfaces; Parallel processing;
Virtual machines",
treatment = "P Practical",
}
@TechReport{Gropp:1992:TIM,
author = "Bill Gropp and Ewing Lusk",
title = "A test implementation of the {MPI} draft
message-passing standard",
institution = inst-ANL-mcs,
address = inst-ANL:adr,
year = "1992",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Leon:1992:FP,
author = "Juan Leon and Allan L. Fisher and Peter Steenkiste",
title = "Fail-safe {PVM}",
crossref = "SCRI:1992:PWC",
year = "1992",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@Article{Majumdar:1992:PPC,
author = "A. Majumdar and W. R. Martin",
title = "Parallel preconditioned conjugate gradient algorithm
applied to neutron diffusion problem",
journal = j-TRANS-AM-NUCL-SOC,
volume = "65",
pages = "209--210",
year = "1992",
CODEN = "TANSAO",
ISSN = "0003-018X",
bibdate = "Sun Dec 22 10:17:16 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Michigan Univ., Ann Arbor, MI, USA",
classification = "A0260 (Numerical approximation and analysis); A2820H
(Neutron diffusion); A2841C (Computer codes); C4130
(Interpolation and function approximation); C4240P
(Parallel programming and algorithm theory); C7470
(Nuclear engineering)",
fjournal = "Transactions of the American Nuclear Society",
keywords = "BBN TC2000; Distributed workstation; IBM RS6000;
Iterative method; Linear system; Neutron diffusion;
Parallel PCG algorithm; Parallel virtual machine;
Parallelization software; Preconditioned conjugate
gradient; Shared memory machine",
thesaurus = "Iterative methods; Neutron diffusion; Nuclear
engineering computing; Parallel algorithms",
}
@InProceedings{McRae:1992:VC,
author = "S. J. McRae",
title = "{VM} communications",
crossref = "Anonymous:1992:PSE",
pages = "439--453",
year = "1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Soft-Switch Ltd., Reading, UK",
classification = "C6150J (Operating systems); C6155 (Computer
communications software)",
keywords = "3270 Protocols; APPC; Business needs; Client/server
communications; Communication offerings; Communications
infra-structure; IBM host system; LANRES; MVS; OSI
connectivity; PVM; RSCS; SAA communications strategy;
SNA connectivity; TCP/IP; TCP/IP connectivity; VM;
VM/ESA; X.25 communications",
thesaurus = "Computer communications software; Operating systems
[computers]",
}
@InProceedings{Otto:1992:MAP,
author = "S. W. Otto and M. Wolfe",
title = "The {MetaMP} approach to parallel programming",
crossref = "Siegel:1992:FFS",
pages = "562--565",
year = "1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oregon Graduate Inst., Beaverton, OR, USA",
classification = "C6110P (Parallel programming); C6140D (High level
languages)",
keywords = "MetaMP; Parallel programming",
thesaurus = "High level languages; Parallel programming",
}
@InProceedings{Shen:1992:VTD,
author = "S. Shen and L. Kleinrock",
title = "The virtual-time data-parallel machine",
crossref = "Siegel:1992:FSF",
pages = "46--53",
year = "1992",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming)",
conflocation = "McLean, VA, USA; 19-21 Oct. 1992",
corpsource = "Dept. of Comput. Sci., California Univ., Los Angeles,
CA, USA",
keywords = "asynchronous execution; computation-intensive
data-parallel; FIFO priory cache; parallel machines;
parallel programming; processing element; programs;
SIMD; single instruction multiple data; virtual-time
data-parallel machine",
sponsororg = "IEEE; NASA",
treatment = "P Practical",
}
@InProceedings{Sunderam:1992:CCP,
author = "Vaidy Sunderam",
title = "Concurrent Computing with {PVM}",
crossref = "SCRI:1992:PWC",
year = "1992",
bibsource = "Distributed/clusters.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Wolbers:1992:SPP,
author = "S. Wolbers",
title = "Software for parallel processing applications",
crossref = "Verkerk:1992:PIC",
pages = "111--116",
year = "1992",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Fermilab, Batavia, IL, USA",
classification = "A2980 (Nuclear information processing); C6110P
(Parallel programming); C7320 (Physics and Chemistry)",
keywords = "ACPMAPS; CANOPY; Cooperative processes software;
High-energy physics; Lattice QCD; Monte Carlo
generation; Offline event reconstruction; Parallel
processing; Tightly-coupled machines; Workstation
clusters",
thesaurus = "Monte Carlo methods; Parallel programming; Physics
computing",
}
@Article{Almasi:1993:PDS,
author = "G. S. Almasi and T. McLuckie and J. Bell and A.
Gordon",
title = "Parallel distributed seismic migration",
journal = j-CPE,
volume = "5",
number = "2",
pages = "105--131",
month = apr,
year = "1993",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
classification = "A9130 (Seismology); C5440 (Multiprocessor systems
and techniques); C7340 (Geophysics)",
fjournal = "Concurrency, practice and experience",
keywords = "15 MFLOPS; Ethernet; IBM RISC/6000 workstations;
Linda; Parallel distributed seismic migration;
Performance; Programming models; PVM; Remote procedure
calls; Token ring",
numericalindex = "Computer speed 1.5E+07 FLOPS",
pubcountry = "UK",
thesaurus = "Geophysics computing; Parallel processing;
Seismology",
}
@Article{Altevogt:1993:PTD,
author = "P. Altevogt and A. Linke",
title = "Parallelization of the two-dimensional {Ising} model
on a cluster of {IBM RISC System\slash 6000}
workstations",
journal = j-PARALLEL-COMPUTING,
volume = "19",
number = "9",
pages = "1041--1052",
month = sep,
year = "1993",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sci. Center, IBM, Heidelberg, Germany",
classification = "A0550 (Lattice theory and statistics; C5220P
(Parallel architecture); C7320 (Physics and Chemistry);
Ising problems)",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "IBM RISC System/6000 workstations; Metropolis
algorithm; Multispin coding; NSC DX Router; PVM
programming environment; Token ring; Two-dimensional
Ising model",
pubcountry = "Netherlands",
thesaurus = "Ising model; Physics computing; Reduced instruction
set computing",
}
@Article{Anonymous:1993:MMP,
author = "Anonymous",
title = "{MPI}: a message passing interface",
journal = j-PROC-SUPERCOMPUT,
pages = "878--883",
month = "????",
year = "1993",
CODEN = "????",
ISBN = "0-8186-4340-4",
ISBN-13 = "978-0-8186-4340-8",
ISSN = "1063-9535",
LCCN = "QA76.5 .S894 1993",
bibdate = "Fri May 24 09:57:40 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper presents an overview of MPI, a proposed
standard message passing interface for MIMD distributed
memory concurrent computers, The design of MPI has been
a collective effort involving researchers in the United
States and Europe from many organization and
institutions. MPI includes point-to-point and
collective communication routines, as well as support
for process groups, communication contexts, and
application topologies. While making use of new ideas
where appropriate, the MPI standard is based largely on
current practice.",
acknowledgement = ack-nhfb,
classification = "723; 902.2; C6150N (Distributed systems software)",
conference = "Proceedings of the Supercomputing '93 Conference",
conferenceyear = "1993",
fjournal = "Proceedings of the Supercomputing Conference",
journalabr = "Proc Supercomputing Conf",
keywords = "Application topologies; application topologies;
collective communication routines; Collective
communication routines; collective communication
routines; Communication contexts; communication
contexts; Distributed computer systems; message
passing; Message passing interface; MIMD distributed
memory concurrent computers; MPI; MPI standard; MPI
standard overview; Point-to-point communication;
point-to-point communication; process groups; Process
groups; process groups; software standards; standard
message passing interface; Standard message passing
interface; standard message passing interface;
Standards",
meetingaddress = "Portland, OR, USA",
meetingdate = "Nov 15--19 1993",
meetingdate2 = "11/15--19/93",
publisherinfo = "Computer Society Press",
sponsor = "IEEE Computer Society; ACM SIGARCH",
sponsororg = "IEEE; ACM SIGARCH",
treatment = "P Practical",
}
@Article{Anonymous:1993:MPI,
author = "Anonymous",
title = "Message-Passing Interface",
journal = j-IJSA,
volume = "7",
number = "2",
pages = "179--179",
month = jun,
year = "1993",
CODEN = "IJSAE9",
DOI = "https://doi.org/10.1177/109434209300700208",
ISSN = "0890-2720",
bibdate = "Tue Nov 6 11:28:49 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209300700208",
acknowledgement = ack-nhfb,
fjournal = "The International Journal of Supercomputer
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@TechReport{Arthur:1993:CUA,
author = "Trey Arthur and Michael J. Bockelie",
title = "A comparison of using {APPL} and {PVM} for a parallel
implementation of an unstructured grid generation
problem",
number = "NASA CR-191425",
institution = "National Aeronautics and Space Administration, Langley
Research Center; National Technical Information
Service, distributor",
address = "Hampton, VA, USA",
pages = "??",
year = "1993",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "NASA contractor report",
acknowledgement = ack-nhfb,
annote = "Distributed to depository libraries in microfiche.
Shipping list no.: 93-1026-M. Microfiche. [Washington,
DC: National Aeronautics and Space Administration,
1993] 1 microfiche.",
govtdocnumber = "NAS 1.26:191425 0830-H-14 (MF)",
keywords = "Numerical grid generation (Numerical analysis)",
}
@InProceedings{Arthur:1993:PIU,
author = "T. Arthur and M. Bockelie",
title = "A Parallel Implementation of the Unstructured Grid
Generation Program {VGRIDSG} Using {PVM} and {APPL}",
crossref = "Sincovec:1993:SCP",
pages = "899--902",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Baiardi:1993:PVM,
author = "F. Baiardi and M. Jazayeri",
title = "{P03M}: a Virtual Machine Approach to Massively
Parallel Computing",
journal = j-PROC-INT-CONF-PAR-PROC,
pages = "I-340--??",
month = "????",
year = "1993",
CODEN = "PCPADL",
ISSN = "0190-3918",
LCCN = "QA76.6.I548a",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the International Conference on
Parallel Processing",
}
@InProceedings{Baraglia:1993:PWC,
author = "R. Baraglia and D. Laforenza and R. Perego",
title = "Programming a workstation cluster with {PVM} and
{Linda}: a qualitative and quantitative comparison",
crossref = "Anonymous:1993:ISA",
pages = "101--114",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Barth:1993:CNM,
author = "N. H. Barth and S. L. Smith",
title = "Coupling Numerical Models of the Atmosphere and Ocean
Using the Parallel Virtual Machine ({PVM}) Package",
crossref = "Sincovec:1993:SCP",
pages = "71--75",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Bedrosian:1993:MFA,
author = "G. Bedrosian and R. W. Benway",
title = "Magnetostatic finite-element analysis on {MIMD\slash
DMMP} parallel computers",
crossref = "Yelon:1993:PTS",
journal = j-J-APPL-PHYS,
volume = "73",
number = "10",
pages = "6772--6777",
year = "1993",
CODEN = "JAPIAU",
ISSN = "0021-8979 (print), 1089-7550 (electronic), 1520-8850",
ISSN-L = "0021-8979",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "GE Corporate Research and Development, Schenectady,
NY, USA",
classification = "A0260 (Numerical approximation and analysis); A4110D
(Electrostatics, magnetostatics); B0290T (Finite
element analysis); B5120 (Magnetostatics)",
fjournal = "Journal of Applied Physics",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=4915369",
keywords = "Distributed-memory; H3D; In-house magnetostatic
finite-element analysis code; Intel iPSC/860 Hypercube;
Local area network; Message-passing; MIMD/DMMP parallel
computers; Multiple closely coupled CPUs;
Multiple-data; Multiple-instruction; Networks of
heterogeneous workstations; Parallel virtual machine;
Porting; Supercomputers",
thesaurus = "Finite element analysis; Magnetic fields",
}
@InProceedings{Beguelin:1993:PEC,
author = "A. Beguelin and J. Dongarra and A. Geist and R.
Manchek and S. Otto and J. Walpole",
title = "{PVM}: {Experiences}, current status and future
direction",
crossref = "IEEE:1993:PSP",
pages = "765--766",
year = "1993",
bibdate = "Thu Apr 16 08:51:18 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software); C7320 (Physics and chemistry
computing); C7410D (Electronic engineering computing)",
corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
keywords = "circuit analysis; Circuit analysis; circuit analysis;
computational problems; Computational requirements;
computational requirements; computing; computing
requirements; Computing requirements; computing
requirements; concurrent; Concurrent computing;
concurrent computing; concurrent processing; Concurrent
processing; concurrent processing; electronic
engineering computing; engineering design; Engineering
design; engineering design; Hardware multiprocessors;
hardware multiprocessors; high-; High-performance
applications; high-performance applications;
Integration aspects; integration aspects; material
sciences; Material sciences; material sciences;
multiprocessing programs; package; parallel processors;
Parallel processors; parallel processors; parallel
programming; parallel virtual machine; Parallel virtual
machine; parallel virtual machine; performance
applications; Physical sciences; physical sciences;
physics computing; PVM; scientific; Scientific
computational problems; scientific computational
problems; Simulation; simulation; software; Software
package; software package; software packages",
sponsororg = "IEEE; ACM SIGARCH",
treatment = "P Practical",
}
@InCollection{Beguelin:1993:PHT,
author = "A. Beguelin and J. Dongarra and A. Geist and R.
Manchek and K. Moore and V. Sunderam",
editor = "J. S. Kowalik and L. Grandinetti",
title = "{PVM} and {HeNCE}: Tools for Heterogeneous Network
Computing",
crossref = "Kowalik:1993:SPC",
pages = "??--??",
year = "1993",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/Par.Arch.Indep.bib;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/Par.Arch.Indep.bib",
acknowledgement = ack-nhfb,
}
@Article{Beguelin:1993:VDH,
author = "Adam Beguelin and Jack Dongarra and Al Geist and V.
Sunderam",
title = "Visualization and Debugging in a Heterogeneous
Environment",
journal = j-COMPUTER,
volume = "26",
number = "6",
pages = "88--95",
month = jun,
year = "1993",
CODEN = "CPTRB4",
ISSN = "0018-9162 (print), 1558-0814 (electronic)",
ISSN-L = "0018-9162",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover
library database",
abstract = "A monitoring tool and a graphical interface working on
top of the PVM software can help programmers make
better use of heterogeneous networks of computers.",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C6115 (Programming support); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems)",
fjournal = "Computer",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2",
keywords = "Debugging; Graphical monitoring package; Graphical
programming environment; Hence; Heterogeneous
distributed programs; Heterogeneous environment;
Parallel virtual machine; Program visualisation; Xab",
thesaurus = "Multiprocessing programs; Open systems; Parallel
programming; Program debugging; Software tools; System
monitoring; Virtual machines; Visual programming",
}
@InProceedings{Beguelin:1993:XAT,
author = "Adam Beguelin",
title = "Xab: a Tool for Monitoring {PVM} Programs",
crossref = "IEEE:1993:WHP",
pages = "92--97",
year = "1993",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/debug_3.1.bib",
}
@TechReport{Beguelin:1993:XTMa,
author = "Adam L. Beguelin",
title = "Xab: a tool for monitoring {PVM} programs",
type = "Research paper",
number = "CMU-CS-93-164",
institution = inst-SCS-CMU,
address = inst-SCS-CMU:adr,
pages = "8",
year = "1993",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Techreports/tr.misc.bib",
abstract = "Xab (X-window Analysis and deBugging) is a tool for
run time monitoring of PVM (Parallel Virtual Machine)
programs. PVM supports the programming of a network of
heterogeneous computers as a single parallel computer.
Using Xab, PVM programs can easily be instrumented and
monitored. Xab uses PVM to monitor PVM programs. This
makes Xab very portable but it leads to interesting
issues of how to make Xab peacefully coincide with the
programs it monitors. Xab consists of three main
components, a user library, a monitoring program, and
an X windows front end. The user library provides
instrumented versions of the PVM calls. The monitoring
program runs as a PVM process and gathers monitor
events in the form of PVM messages. The Xab front end
displays information graphically about PVM processes
and messages. This paper discusses the design,
implementation, and use of the Xab tool. Related work
is briefly presented and contrasted with the approach
taken with Xab. How Xab works and how it is used are
discussed in detail. Finally, the current status of Xab
is presented along with future directions of where the
research may go from here.",
acknowledgement = ack-nhfb,
annote = "This paper also appears in the proceedings of the
April 1993 Workshop on Heterogeneous Processing, IEEE
Computer Society Press. June 2, 1993.",
keywords = "Debugging in computer science; Parallel programming
(Computer science)",
}
@InProceedings{Beguelin:1993:XTMb,
author = "A. L. Beguelin",
title = "Xab: a tool for monitoring {PVM} programs",
crossref = "Mudge:1993:PTS",
volume = "2",
pages = "102--103 (vol. 2) (or 4--??)",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C6115 (Programming support); C6150G (Diagnostic,
testing, debugging and evaluating systems); C7430
(Computer engineering)",
corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
keywords = "Feedback; feedback; Heterogeneity; heterogeneity;
heterogeneous; Heterogeneous multiprogramming
environment; Monitoring PVM programs; monitoring PVM
programs; multiprogramming; multiprogramming
environment; Parallel virtual machine; parallel virtual
machine; performance evaluation; program testing; Run
time monitoring tool; run time monitoring tool;
software tools; virtual machines; Xab",
sponsororg = "ACM; IEEE",
thesaurus = "Multiprogramming; Performance evaluation; Program
testing; Software tools; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Castro-Leon:1993:MCP,
author = "E. Castro-Leon",
title = "A model of computation with parallel solvers",
crossref = "Anonymous:1993:SEC",
pages = "189--198",
year = "1993",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Intel Supercomputer Syst. Div., Beaverton, OR, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
keywords = "Data parallel programming; Distributed memory
computers; Message passing; Parallel libraries;
Parallel solvers; Performance; Programming
environments; Rehosting",
thesaurus = "Distributed memory systems; Parallel programming;
Programming environments",
}
@MastersThesis{Cavender:1993:APV,
author = "Mark Edward Cavender",
title = "Asynchronous parallel virtual machine",
type = "M.S. thesis",
school = "University of Texas at San Antonio. Division of
Mathematics and Computer Science and Statistics",
address = "San Antonio, TX, USA",
pages = "vi + 228",
year = "1993",
bibdate = "Mon Jan 15 18:16:25 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "Parallel processing (Electronic computers); Virtual
computer systems.",
}
@InProceedings{Chandrasekharan:1993:RTB,
author = "N. Chandrasekharan and V. Goel",
title = "Ray tracing and binary tree computations using {PVM}",
crossref = "Mudge:1993:PTS",
volume = "2",
pages = "104--105 (vol. 2)",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Univ. of Central Florida,
Orlando, FL, USA",
classification = "C6130B (Graphics techniques); C6150J (Operating
systems); C7430 (Computer engineering)",
corpsource = "Dept. of Comput. Sci., Univ. of Central Florida,
Orlando, FL, USA",
keywords = "Binary tree computations; binary tree computations;
Computational problems; computational problems;
Parallel virtual machine; parallel virtual machine;
problem; PVM; ray; Ray tracing; ray tracing; rendering
(computer graphics); rendering computer; Rendering
computer synthesized images; scheduling; Scheduling
technique; scheduling technique; synthesized images;
tracing; tree contraction; Tree contraction problem;
virtual machines",
sponsororg = "ACM; IEEE",
thesaurus = "Ray tracing; Rendering [computer graphics];
Scheduling; Virtual machines",
treatment = "A Application; P Practical",
}
@Article{Chatterjee:1993:GLA,
author = "S. Chatterjee and J. R. Gilbert and F. J. E. Long and
R. Schreiber and S.-H. Teng",
title = "Generating local addresses and communication sets for
data-parallel programs",
journal = j-SIGPLAN,
volume = "28",
number = "7",
pages = "149--158",
month = jul,
year = "1993",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "NASA Ames Res. Center, Moffett Field, CA, USA",
classification = "C4220 (Automata theory); C6110P (Parallel
programming); C6140D (High level languages)",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "Communication sets; Data-parallel languages;
Distributed-memory implementations; Fast algorithms;
Local memory access sequence; Multidimensional arrays;
State machines",
thesaurus = "Distributed memory systems; Finite automata; FORTRAN;
Parallel programming",
}
@InProceedings{Colombet:1993:SMI,
author = "L. Colombet and L. Desbat and F. Menard",
title = "Star Modeling on {IBM RS6000} Networks Using {PVM}",
crossref = "IEEE:1993:PIS",
pages = "121--128",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "LMC-IMAG, Grenoble, France",
classification = "C5220P (Parallel architecture); C5470 (Performance
evaluation and testing)",
corpsource = "LMC-IMAG, Grenoble, France",
keywords = "architectures; evaluation; Heterogeneous networks;
heterogeneous networks; heterogeneous parallel;
Heterogeneous parallel architectures; IBM RS6000; IBM
RS6000 networks; Monte Carlo methods; Monte Carlo
radiative transfer code; networks; parallel; parallel
architectures; Parallel performances; parallel virtual
machine; Parallel virtual machine; performance;
performances; PVM; star modelling; Star modelling",
sponsororg = "IEEE; Washington State Univ.; NPAC at Syracuse Univ.;
ACM; Washington Technol. Center",
thesaurus = "Monte Carlo methods; Parallel architectures;
Performance evaluation",
treatment = "P Practical",
}
@InProceedings{Coussement:1993:PMO,
author = "G. Coussement",
title = "Parallelization of a mesh optimization code on a
{RS\slash} 6000 cluster",
crossref = "Anonymous:1993:PSE",
pages = "185--212",
year = "1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Aerodynamics Dept., Office Nat. d'Etudes et de
Recherches Aerospatiales, Chatillon, France",
classification = "C1180 (Optimisation techniques); C6110P (Parallel
programming)",
keywords = "Code structure; Communication protocol; IBM RS/6000;
Multi-domain structured mesh optimization code;
OPTIM3D; Parallelization effort; PVM; Three-dimensional
mesh optimization method",
thesaurus = "IBM computers; Optimisation; Parallel programming",
}
@Article{Culler:1993:LTR,
author = "David E. Culler and Richard M. Karp and David A.
Patterson and Abhijit Sahay and Klaus E. Schauser and
Eunice Santos and Ramesh Subramonian and Thorsten von
Eicken",
title = "{LogP}: towards a realistic model of parallel
computation",
journal = j-SIGPLAN,
volume = "28",
number = "7",
pages = "1--12",
month = jul,
year = "1993",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 14 18:49:37 MST 1995",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. of Comput. Sci., California Univ., Berkeley, CA,
USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C7430 (Computer
engineering)",
confdate = "19-22 May 1993",
conflocation = "San Diego, CA, USA",
confsponsor = "ACM",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "CM-5; Communication bandwidth; Communication delay;
Computing bandwidth; LogP; Machine configuration;
Machine designers; Parallel computers; Parallel machine
model; Portable parallel algorithms",
thesaurus = "Parallel algorithms; Parallel machines; Parallel
programming; Virtual machines",
}
@InProceedings{daCunha:1993:PLA,
author = "R. D. da Cunha and T. Hopkins",
title = "Porting linear algebra subroutines from transputers to
clusters of workstations",
crossref = "Grebe:1993:TAS",
pages = "660--667",
year = "1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Comput. Lab., Kent Univ., Canterbury, UK",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C6110B
(Software engineering techniques); C6150N (Distributed
systems); C7310 (Mathematics)",
keywords = "Fortran77; Linear algebra subroutines; Message-passing
system; Occam2; Parallel Virtual Machine; PVM;
Subroutine porting; Transputers; Workstation clusters",
thesaurus = "FORTRAN; Linear algebra; Mathematics computing;
Message passing; Occam; Software portability;
Subroutines; Transputer systems",
}
@Article{Damodaran-Kamal:1993:NTD,
author = "S. K. Damodaran-Kamal and J. M. Francioni",
title = "Nondeterminacy: testing and debugging in message
passing parallel programs",
journal = j-SIGPLAN,
volume = "28",
number = "12",
pages = "118--128",
month = dec,
year = "1993",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Southwestern Louisiana Univ.,
Lafayette, LA, USA",
classification = "C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems)",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "Debugging tool; Mdb; Message passing parallel
programs; Nondeterminacy; Parallel program; Program
errors; PVM programs; Testing tool",
thesaurus = "Message passing; Parallel programming; Program
debugging; Program testing",
}
@InProceedings{Despons:1993:CCP,
author = "R. Despons and T. Muntean",
title = "Constructing correct protocols for a diffusion virtual
machine in message passing parallel architectures",
crossref = "Grebe:1993:TAS",
pages = "465--480",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C5640
(Protocols); C6150N (Distributed systems); C7430
(Computer engineering)",
corpsource = "IMAG-LGI Lab., Grenoble Univ., France",
keywords = "architectures; communication protocols; diffusion
protocols; diffusion virtual machine; machines;
massively parallel architectures; message passing;
parallel; parallel algorithms; parallel applications;
parallel architectures; parallel machines; programming
environments; protocols; virtual machines",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@InProceedings{Dongarra:1993:DSM,
author = "J. J. Dongarra and R. Hempel and A. J. G. Hey and D.
W. Walker",
title = "A draft standard for message passing in a distributed
memory environment",
crossref = "Hoffmann:1993:PFE",
pages = "465--481",
year = "1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C6150N
(Distributed systems)",
keywords = "C language; Data distribution transformations;
Distributed memory environment; Draft standard; Fortran
77; Library interface standard; Message passing;
Message Passing Interface 1; Message selectivity;
Message type; MPI1; Source process",
thesaurus = "Distributed memory systems; Message passing",
}
@Article{Dongarra:1993:IPF,
author = "Jack Dongarra and G. A. Geist and Robert Manchek and
V. S. Sunderam",
title = "Integrated {PVM} Framework Supports Heterogeneous
Network Computing",
journal = j-COMPUT-PHYS,
volume = "7",
number = "2",
pages = "166--174 (or 166--175??)",
month = mar # "--" # apr,
year = "1993",
CODEN = "CPHYE2",
ISSN = "0894-1866 (print), 1558-4208 (electronic)",
ISSN-L = "0894-1866",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Tennessee Univ., Knoxville, TN, USA",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems); C7300 (Natural sciences); C7430 (Computer
engineering)",
corpsource = "Tennessee Univ., Knoxville, TN, USA",
fjournal = "Computers in Physics",
keywords = "computer networks; computing; Concurrent applications;
concurrent applications; distributed processing;
Heterogeneous network computing; heterogeneous network
computing; Integrated framework; integrated framework;
natural sciences; parallel programming; Parallel
virtual machine software; parallel virtual machine
software; Scientific computations; scientific
computations; virtual machines",
thesaurus = "Computer networks; Distributed processing; Natural
sciences computing; Parallel programming; Virtual
machines",
treatment = "G General Review; P Practical",
}
@TechReport{Dongarra:1993:PUM,
author = "J. Dongarra and R. Hempel and A. Hay and D. Walker",
title = "A Proposal for a User-Level Message Passing Interface
in a Distributed Memory Environment",
type = "Technical Report",
number = "ORNL/TM-12231",
institution = inst-ORNL,
address = inst-ORNL:adr,
month = feb,
year = "1993",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/par.lin.alg.bib;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/par.lin.alg.bib",
}
@InProceedings{Dongarra:1993:UPR,
author = "J. J. Dongarra and A. Geist and R. Manchek and W.
Jiang",
title = "Using {PVM} 3.0 to Run Grand Challenge Applications on
a Heterogeneous Network of Parallel Computers",
crossref = "Sincovec:1993:SCP",
pages = "873--877",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Ewing:1993:DCW,
author = "R. E. Ewing and D. Mitchum and P. O'Leary and R. C.
Sharpley and J. S. Sochacki",
title = "Distributed Computation of Wave Propagation Models
Using {PVM}",
crossref = "IEEE:1993:PSP",
pages = "22--31",
year = "1993",
bibdate = "Wed Apr 15 12:04:03 MDT 1998",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Texas A\&M Univ",
affiliationaddress = "College Station, TX, USA",
classification = "484.1; 723; 921; C5440 (Multiprocessing systems);
C6110P (Parallel programming); C7340 (Geophysics
computing); C7430 (Computer engineering)",
corpsource = "Inst. for Sci. Comput., Texas A and M Univ., College
Station, TX, USA",
keywords = "Computer simulation; Computer workstations;
distributed computation; distributed memory systems;
Earth; geophysics computing; handling large-scale
problems; IBM RS/6000s; Large-earth models; large-scale
computations; Large-scale problems; nodes; numerical
approximation; parallel processing; parallel processing
environment; Parallel processing environment; Parallel
processing systems; Parallel Virtual Machine; Parallel
virtual machine (PVM); PVM; Seismic wave propagation;
seismic waves; Seismic waves; supercomputers; timings;
virtual machines; visualization; wave propagation; Wave
propagation; wave propagation models; Wave propagation
models; workstations",
sponsororg = "IEEE; ACM SIGARCH",
treatment = "P Practical",
}
@InProceedings{Fritscher:1993:PDC,
author = "J. F. Fritscher and F. Sukup",
title = "{93SC038} Parallel Distributed Computing Using {PVM}",
crossref = "Anonymous:1993:ATA",
pages = "221--228",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Geist:1993:EPC,
author = "G. A. Geist and V. S. Sunderam",
title = "The evolution of the {PVM} concurrent computing
system",
crossref = "IEEE:1993:DPC",
pages = "549--557",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat. Lab., TN, USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C7430 (Computer
engineering)",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "future; Future trends; high-performance computations;
High-performance computations; historical evolution;
Historical evolution; networked environments; Networked
environments; ongoing research projects; Ongoing
research projects; parallel; parallel machines;
parallel programming; Parallel programming; Parallel
Virtual Machine; programming; programming model;
Programming model; PVM concurrent computing system;
scientific; Scientific supercomputing; software
infrastructure; Software infrastructure;
supercomputing; trends; virtual machines",
thesaurus = "Parallel machines; Parallel programming; Virtual
machines",
treatment = "P Practical",
}
@InProceedings{Geist:1993:ILP,
author = "G. A. Geist",
title = "Invited Lecture: {PVM} 3 Beyond Network Computing",
crossref = "Volkert:1993:PCS",
pages = "194--203",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Geist:1993:PBN,
author = "G. A. Geist",
title = "{PVM} 3 beyond network computing",
crossref = "Volkert:1993:PCS",
pages = "194--203",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat. Lab., TN, USA",
classification = "C6115 (Programming support); C6150N (Distributed
systems); C7430 (Computer engineering)",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "distributed computing; Distributed computing;
distributed memory computer; Distributed memory
computer; distributed memory systems; heterogeneous
network computing; Heterogeneous network computing;
machines; message-; Message-passing constructs;
parallel machines; Parallel Virtual Machine; passing
constructs; PVM 3; software package; Software package;
virtual",
pubcountry = "Germany",
thesaurus = "Distributed memory systems; Parallel machines; Virtual
machines",
treatment = "P Practical",
}
@InProceedings{Geist:1993:PTW,
author = "A. Geist and J. Dongarra and A. Beguelin and B.
Manchek and Weicheng Jiang",
title = "{PVM} takes over the world",
crossref = "IEEE:1993:PSP",
pages = "618--618",
year = "1993",
DOI = "https://doi.org/10.1109/SUPERC.1993.1263513",
bibdate = "Fri May 27 10:20:49 2005",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Glendinning:1993:MMP,
author = "I. Glendinning",
title = "{93SC041} The {MPI} Message Passing Interface",
crossref = "Anonymous:1993:ATA",
pages = "229--236",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Hariri:1993:MPI,
author = "S. Hariri and J. B. Park and F.-K. Yu and M. Parashar
and G. C. Fox",
title = "A message passing interface for parallel and
distributed computing",
crossref = "IEEE:1993:PIS",
pages = "84--91",
year = "1993",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6150M (Protocols); C5640 (Protocols); C5670
(Network performance)",
corpsource = "Northeast Parallel Archit. Center, Syracuse Univ., NY,
USA",
keywords = "architectural support; communication protocol;
distributed computing; distributed processing; gigabit
networks; message passing; message passing interface;
parallel computing; parallel processing; performance
evaluation; protocols; supercomputing capabilities",
sponsororg = "IEEE; Washington State Univ.; NPAC at Syracuse Univ.;
ACM; Washington Technol. Center",
treatment = "P Practical",
}
@InProceedings{Hartley:1993:CPS,
author = "C. L. Hartley and V. S. Sunderam",
title = "Concurrent programming with shared objects in
networked environments",
crossref = "IEEE:1993:PSI",
pages = "471--478",
year = "1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
classification = "C6110J (Object-oriented programming); C6110P
(Parallel programming); C6115 (Programming support);
C6150N (Distributed systems)",
keywords = "Application development; Concurrent programming;
Distributed computing; Ease of use; Message passing;
Networked computing platforms; Networked environments;
Object-oriented techniques; Partitioning; Portable
software systems; PVM distributed computing system;
Scheduling; Shared objects; Shared-object concurrent
computation; Synchronization; Toolkit",
thesaurus = "Multiprocessing programs; Object-oriented programming;
Parallel programming; Software tools",
}
@InProceedings{Hebeker:1993:CPC,
author = "F.-K. Hebeker",
title = "On a coarse-grained parallel code to simulate reactive
flows on an {IBM RS\slash} 6000 workstation-cluster",
crossref = "Brebbia:1993:ASE",
pages = "253--262",
year = "1993",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM Heidelberg Sci. Center, Germany",
classification = "A4710 (General fluid dynamics theory, simulation and
other computational methods); A4770F (Chemically
reactive flows); C6110P (Parallel programming); C7440
(Civil and mechanical engineering computing); C7450
(Chemical engineering computing)",
keywords = "Algorithmic development; Chemical source terms;
Coarse-grained parallel code; Compressible flow;
Compressible Navier--Stokes equations; Domain splitting
techniques; Engineering-mathematical modelling; Global
exothermic reaction chemistry; IBM RS/6000 workstation
cluster; Internal combustion engines; Knock damage;
Message passing; Numerical simulation; Optimally
adapted code; Performance measurements; PVM programming
environment; Reactive flow simulation; Semi-implicit
treatment; Shock-capturing finite-volume scheme",
thesaurus = "Chemical engineering computing; Chemically reactive
flow; Digital simulation; Distributed algorithms; Flow
simulation; IBM computers; Internal combustion engines;
Mechanical engineering computing; Message passing;
Parallel programming",
}
@Article{Jesshope:1993:LRV,
author = "C. Jesshope",
title = "Latency reduction in {VLSI} routers",
journal = j-PARALLEL-PROCESS-LETT,
volume = "3",
number = "4",
pages = "485--494",
month = dec,
year = "1993",
CODEN = "PPLTEE",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Electron. and Electr. Eng., Surrey Univ.,
Guildford, UK",
classification = "C5220P (Parallel architecture); C5470 (Performance
evaluation and testing)",
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
keywords = "Latency reduction; MPI router chip; Parallel
computers; Scalable performance; VLSI routers",
pubcountry = "Singapore",
thesaurus = "Fault tolerant computing; Parallel architectures;
Performance evaluation; VLSI",
}
@InProceedings{Jesshope:1993:MCA,
author = "C. Jesshope",
title = "The {MPI} Chip and its Applications",
crossref = "Anonymous:1993:JFI",
pages = "47--54",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@MastersThesis{Katamneni:1993:PPE,
author = "Sreevenu Katamneni",
title = "Parallel processing extensions to {Verilog HDL} using
the {PVM} environment",
type = "M.S.E.E. thesis",
school = inst-UAL-EE,
address = inst-UAL-EE:adr,
pages = "viii + 108",
year = "1993",
bibdate = "Mon Jan 15 18:16:30 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "Computer hardware description languages.; Parallel
processing (Electronic computers); Verilog (Computer
hardware description language); Virtual computer
systems.",
}
@Article{Kikuchi:1993:PAS,
author = "S. Kikuchi",
title = "Parallelization assist system",
journal = j-JOHO-SHORI,
volume = "34",
number = "9",
pages = "1158--1169",
month = sep,
year = "1993",
CODEN = "JOSHA4",
ISSN = "0447-8053",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Central Res. Lab., Hitachi Ltd, Tokyo, Japan",
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
fjournal = "Joho-Shori (J. Information Processing Soc. Japan)",
keywords = "ASPAR; Express; Flow-insensitive systems;
Flow-sensitive systems; FORGE9O; KAP; Message passing
interface standards; MIMDizer; ParaGraph; Parallel
Fortran Converter; Parallelization assistance system;
ParaScope Editor; Parassist; Perfect club benchmarks;
PIE; Portable Instrumented Communication Library;
Profiling tools; PTOOL; SUPERB; SUPRENUM FORTRAN;
Transformations; VAST",
language = "Japanese",
pubcountry = "Japan",
thesaurus = "FORTRAN; Parallel programming; Reduced instruction set
computing; Software tools",
}
@Article{Kranz:1993:IMP,
author = "David Kranz and Kirk L. Johnson and Anant Agarwal and
John Kubiatowicz and Beng-Hong Lim",
title = "Integrating message-passing and shared-memory: early
experience",
journal = j-SIGPLAN,
volume = "28",
number = "7",
pages = "54--63",
month = jul,
year = "1993",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Dec 14 18:49:37 MST 1995",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "A discussion is given on some of the issues involved
in implementing a shared-address space programming
model on large-scale, distributed-memory
multiprocessors. While such a programming model can be
implemented on both shared-memory and message-passing
architectures, the authors argue that the transparent,
coherent caching of global data provided by many
shared-memory architectures is of crucial importance.
Because message-passing mechanisms are much more
efficient than shared-memory loads and stores for
certain types of interprocessor communication and
synchronization operations, however, the authors argue
for building multiprocessors that efficiently support
both shared-memory and message-passing mechanisms. The
authors describe an architecture, Alewife, that
integrates support for shared-memory and
message-passing through a simple interface; they expect
the compiler and runtime system to cooperate in using
appropriate hardware mechanisms that are most efficient
for specific operations. They report on both integrated
and exclusively shared-memory implementations of the
runtime system and two applications.",
acknowledgement = ack-nhfb,
affiliation = "Lab. for Comput. Sci., MIT, Cambridge, MA, USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming)",
confdate = "19-22 May 1993",
conflocation = "San Diego, CA, USA",
confsponsor = "ACM",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "Alewife; Coherent caching; Compiler;
Distributed-memory multiprocessors; Exclusively
shared-memory implementations; Global data; Hardware
mechanisms; Interprocessor communication;
Message-passing architectures; Message-passing
mechanisms; Runtime system; Shared-address space
programming model; Shared-memory architectures;
Shared-memory loads; Synchronization operations",
thesaurus = "Message passing; Parallel programming; Shared memory
systems",
}
@TechReport{Leon:1993:FPA,
author = "J. Leon and A. L. Fisher and P. Steenkiste",
title = "Fail-safe {PVM}: a portable package for distributed
programming with transparent recovery",
number = "CMU-CS-93-124",
institution = "Carnegie-Mellon University, Department of Computer
Science",
year = "1993",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Techreports/tr.misc.bib",
}
@TechReport{Leon:1993:FPP,
author = "Juan Leon and Allan L. Fisher and Peter Alfons
Steenkiste",
title = "Fail-safe {PVM}: a portable package for distributed
programming with transparent recovery",
institution = inst-SCS-CMU,
address = inst-SCS-CMU:adr,
pages = "22",
year = "1993",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "[Research paper] / Carnegie Mellon University. School
of Computer Science; CMU-CS-93-124 Research paper
(Carnegie Mellon University. School of Computer
Science); CMU-CS-93-124",
abstract = "Many scientific problems benefit from computations
that are parallel at a coarse grain. Collections of
loosely-coupled, heterogeneous computers are
increasingly being applied to these problems. While
individual computers are designed to be relatively
reliable, a collection of several autonomous machines
necessarily has a greater rate of failure. As data
networks improve, and larger multicomputers are being
used, rates of failure will increase. PVM (Parallel
Virtual Machine) [Sun90, GS92] is a popular software
framework that facilitates message-passing network
programming. We present enhancements to PVM to mask
fail-stop, single-node failures from the application.
Fail-safe PVM uses checkpoint and rollback to recover
from such failures. Both checkpoints and rollbacks are
transparent to the application if the application does
not depend on real-time events. Recovery occurs without
wait for repair of the failed computer. The system does
not rely on shared stable storage and does not require
modifications to the operating system. We describe the
design and implementation of fail-safe PVM, present
meassurements [sic] of checkpoint costs, and briefly
discuss shortcomings and potential avenues for
improvement.'' Supported in part by the Defense
Advanced Research Projects Agency, issued by
DARPA/CMO.",
acknowledgement = ack-nhfb,
annote = "February 1993.",
keywords = "Fault-tolerant computing",
}
@InProceedings{Levesque:1993:SAA,
author = "J. M. Levesque and R. Friedman",
title = "The state of the art in automatic parallelisation",
crossref = "Anonymous:1993:SEC",
pages = "95--107",
year = "1993",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Appl. Parallel Res. Inc., Placerville, CA, USA",
classification = "C6110P (Parallel programming)",
keywords = "Automatic parallelisation; Data Distribution
Directives; Distributed memory; Fortran programs;
Parallelization; Shared memory; User assistance",
thesaurus = "FORTRAN; Parallel programming",
}
@InProceedings{Lewis:1993:PCP,
author = "M. J. Lewis and R. E. {Cline, Jr.}",
title = "{PVM} Communication Performance in a Switched {FDDI}
Heterogeneous Distributed Computing Environment",
crossref = "Bhargava:1993:PIW",
pages = "13--19",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sandia Nat. Labs., Livermore, CA, USA",
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C5620L (Local area networks);
C5640 (Protocols)",
corpsource = "Sandia Nat. Labs., Livermore, CA, USA",
keywords = "distributed processing; distributed systems;
Distributed systems; FDDI; heterogeneous distributed
computing; Heterogeneous distributed computing; local
area networks; machines; message; message passing
system; Message passing system; parallel; parallel
machines; Parallel programs; Parallel Virtual Machine;
passing; performance evaluation; programs; PVM;
switched FDDI; Switched FDDI; virtual",
sponsororg = "IEEE",
thesaurus = "Distributed processing; FDDI; Local area networks;
Message passing; Parallel machines; Performance
evaluation; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Li:1993:MSU,
author = "Q. Li and T. G. Yip",
title = "Monitoring Systems Using {PVM}",
crossref = "Law:1993:EDM",
pages = "781--785",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Li:1993:SLL,
author = "Q. Li and J.-C. Liu and T. G. Yip",
title = "Solving Large Linear Equations Using {PVM} System",
crossref = "Law:1993:EDM",
pages = "685--690",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Loyot:1993:VVM,
author = "E. C. {Loyot, Jr.} and A. S. Grimshaw",
title = "{VMPP}: a virtual machine for parallel processing",
crossref = "IEEE:1993:PSI",
pages = "735--740",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150C (Compilers,
interpreters and other processors); C7430 (Computer
engineering)",
corpsource = "Dept. of Comput. Sci., Virginia Univ.,
Charlottesville, VA, USA",
keywords = "front-end translators; parallel languages; parallel
processing; parallel source languages; portability;
program interpreters; software; virtual machine;
virtual machines; VMPP",
sponsororg = "IEEE Comput. Soc.; ACM Sigarch",
treatment = "P Practical",
}
@InProceedings{Maly:1993:DCP,
author = "K. Maly and M. Zubair and S. Kelbar",
title = "Distributed computing with parallel networking",
crossref = "IEEE:1993:PFW",
pages = "375--379",
year = "1993",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Comput. Sci. Dept., Old Dominion Univ., Norfolk, VA,
USA",
classification = "B6150M (Protocols); B6210L (Computer
communications); C5620L (Local area networks); C5640
(Protocols); C5670 (Network performance)",
keywords = "Application performance; Communication network;
Communication network performance; Dedicated parallel
machine; Distributed computing; Ethernet; Parallel
networking; Parallel virtual machine environment; PPVM;
PVM; Round robin scheduling",
thesaurus = "Local area networks; Performance evaluation;
Protocols; Scheduling",
}
@Article{Matrone:1993:LPC,
author = "A. Matrone and P. Schiano and V. Puoti",
title = "{LINDA} and {PVM}: a comparison between two
environments for parallel programming",
journal = j-PARALLEL-COMPUTING,
volume = "19",
number = "8",
pages = "949--957",
month = aug,
year = "1993",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Short communication.",
acknowledgement = ack-nhfb,
affiliation = "Centro Italiano Ricerche Aerospaziali, Capua, Italy",
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
corpsource = "Centro Italiano Ricerche Aerospaziali, Capua, Italy",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "Distributed memory machines; distributed memory
machines; environments; LINDA; Message passing; message
passing; Parallel programming; parallel programming;
programming; Programming environments; programming
environments; PVM; RISC/6000",
pubcountry = "Netherlands",
thesaurus = "Parallel programming; Programming environments",
treatment = "P Practical",
}
@Article{McKinney:1993:MMI,
author = "G. W. McKinney and J. T. West",
title = "Multiprocessing {MCNP} on an {IBM RS\slash} 6000
cluster",
journal = j-TRANS-AM-NUCL-SOC,
volume = "68",
number = "pt.A",
pages = "212--214",
year = "1993",
CODEN = "TANSAO",
ISSN = "0003-018X",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Los Alamos Nat. Lab., NM, USA",
classification = "A0250 (Probability theory, stochastic processes, and
statistics); A0270 (Computational techniques); A0560
(Transport processes: theory); A2820H (Neutron
diffusion); A2841C (Computer codes); C7320 (Physics and
Chemistry); C7470 (Nuclear engineering)",
fjournal = "Transactions of the American Nuclear Society",
keywords = "Electron transport; IBM RS/6000 cluster; MCNP; Monte
Carlo; Multiuser environment; Neutron transport;
Parallel Virtual Machine; Photon transport; PVM
version; Reduced Instruction Set Computer; Workstation
cluster",
thesaurus = "Monte Carlo methods; Neutron transport theory; Nuclear
engineering computing; Photon transport theory; Physics
computing; Transport processes",
}
@Article{Michielse:1993:PMU,
author = "P. Michielse",
title = "Parallel multigrid using {PVM}",
journal = j-SUPERCOMPUTER,
volume = "10",
number = "6",
pages = "10--23",
month = "????",
year = "1993",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Convex Computer, Utrecht, Netherlands",
classification = "C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory); C5440
(Multiprocessor systems and techniques)",
corpsource = "Convex Computer, Utrecht, Netherlands",
fjournal = "Supercomputer",
keywords = "algorithms; Convex; Convex MetaSeries machines;
differential equations; distributed memory systems;
Distributed memory systems; MetaSeries machines;
Parallel; parallel; parallel machines; parallel
multigrid method; Parallel multigrid method; Parallel
Virtual Machine; PVM; shared memory systems; Shared
memory systems; Virtual Machine; virtual machines",
pubcountry = "Netherlands",
thesaurus = "Differential equations; Distributed memory systems;
Parallel algorithms; Parallel machines; Shared memory
systems; Virtual machines",
treatment = "P Practical",
}
@Article{Nanayakkara:1993:PIR,
author = "A. Nanayakkara and D. Moncrieff and S. Wilson",
title = "Performance of {IBM RISC System\slash 6000}
workstation clusters in a quantum chemical
application",
journal = j-PARALLEL-COMPUTING,
volume = "19",
number = "9",
pages = "1053--1062",
month = sep,
year = "1993",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Supercomputer Comput. Res. Inst., Florida State Univ.,
Tallahassee, FL, USA",
classification = "C5430 (Microcomputers); C5470 (Performance
evaluation and testing); C7320 (Physics and
Chemistry)",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "Concurrent computation many-body perturbation theory;
CRAY Y-MP C-90; Electron correlation energy
calculations; IBM RISC System/6000 workstation
clusters; NEC SX-3/44 computers; Parallel virtual
machine system; Performance",
pubcountry = "Netherlands",
thesaurus = "Chemistry computing; IBM computers; Performance
evaluation; Quantum chemistry; Reduced instruction set
computing; Workstations",
}
@Article{Nelson:1993:PPP,
author = "M. L. Nelson",
title = "{PVM} provides power in the public domain",
journal = j-PARALLELOGRAM,
volume = "53",
pages = "20--21",
month = may # "--" # jun,
year = "1993",
CODEN = "PRALEH",
ISSN = "0953-7252",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessor systems and techniques);
C6150N (Distributed systems); C7430 (Computer
engineering)",
fjournal = "Parallelogram",
keywords = "de; De facto standard; distributed computing;
Distributed computing; ease-of-use; Ease-of-use; facto
standard; heterogeneous computer network; Heterogeneous
computer network; maintenance; Maintenance; message
passing; message-passing system; Message-passing
system; parallel machines; parallel programming;
Parallel virtual machine; performance; Performance;
public domain software; Public domain software; PVM;
robustness; Robustness; software packages; virtual
machines",
pubcountry = "UK",
thesaurus = "Message passing; Parallel machines; Parallel
programming; Public domain software; Software packages;
Virtual machines",
treatment = "P Practical; R Product Review",
}
@TechReport{Oed:1993:CRM,
author = "Wilfried Oed",
title = "The {Cray Research} Massively Parallel Processor
System {CRAY T3D}",
institution = "Cray Research GmbH",
address = "M{\"u}nchen, Germany",
month = nov # " 15",
year = "1993",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/Parallel.io.bib",
comment = "A MIMD, shared-memory machine, with 2-processor units
embedded in a 3-d torus. Each link is bidirectional and
runs 300 MB/s. Processors are 150 MHz ALPHA, plus
16--64 MB RAM, plus a memory interface unit. Global
physical address space with remote-reference and
block-transfer capability. Not clear about cache
coherency. Separate tree network for global
synchronization. Support for message send and optional
interrupt. I/O is all done through interface nodes that
hook to the YMP host and to its I/O clusters with 400
MB/s links. I/O is by default serialized, but they do
support a ``broadcast'' read operation (but see
pase:t3d-fortran). FORTRAN compiler supports the NUMA
shared memory; PVM is used for C and message passing.",
keyword = "parallel architecture, shared memory, supercomputer,
parallel I/O, pario bib",
}
@Article{Otto:1993:PAC,
author = "S. W. Otto",
title = "Parallel array classes and lightweight sharing
mechanisms",
journal = j-SCI-PROG,
volume = "2",
number = "4",
pages = "203--216",
month = "Winter",
year = "1993",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst.
of Sci. and Technol., Beaverton, OR, USA",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6110J (Object-oriented
programming); C6110P (Parallel programming); C6120
(File organisation); C6150N (Distributed systems
software)",
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
keywords = "C++; Collective object; Distributed memory;
Distributed memory architectures; Finite difference
stencils; Finite element method; Intel NX message
passing systems; Interpolation/contraction operations;
Lightweight sharing mechanisms; Low level message
passing; Meaningful array operations; MetaMP; Multigrid
algorithms; Parallel array classes; Particle in cell
algorithms; Partitioned array; PVM; Shared memory
architectures; Transparent guard strips; Weak memory
coherence",
thesaurus = "Abstract data types; Distributed memory systems;
Message passing; Object-oriented programming; Parallel
programming",
}
@InProceedings{Parsons:1993:EDC,
author = "I. Parsons",
title = "Evaluation of distributed communication systems",
crossref = "Gawman:1993:PCT",
pages = "956--970 vol.2",
year = "1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Alberta Univ., Edmonton, Alta.,
Canada",
classification = "C0310H (Equipment and software evaluation methods);
C6110P (Parallel programming); C6115 (Programming
support); C6150G (Diagnostic, testing, debugging and
evaluating systems); C6150N (Distributed systems
software)",
keywords = "Balancing act; Communication systems; Concert/C;
Distributed communication systems; Distributed parallel
programs; Enterprise project; Handcrafted code; ISIS;
Network of workstations; NMP; Programming environment;
PVM; Software engineers",
thesaurus = "Network operating systems; Parallel programming;
Program testing; Programming environments; Software
selection",
}
@MastersThesis{Patterson:1993:PPE,
author = "Christopher S. Patterson",
title = "Parametric Positron Emission Tomographic imaging using
Parallel Virtual Machine: with an example using
Myocardial Blood Flow analysis",
type = "M.S. thesis",
school = inst-UTK,
address = inst-UTK:adr,
pages = "x + 132",
year = "1993",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "Blood flow --- Measurement.; Tomography, Emission.;
Virtual computer systems",
}
@InProceedings{Rabenseifner:1993:CDR,
author = "R. Rabenseifner and A. Schuch",
title = "Comparison of {DCE RPC}, {DFN-RPC}, {ONC} and {PVM}",
crossref = "Schill:1993:DOD",
pages = "39--46",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Rechenzentrum, Stuttgart Univ., Germany",
classification = "C5670 (Network performance); C6115 (Programming
support); C6150N (Distributed systems software); C6150N
(Distributed systems)",
conflocation = "Karlsruhe, Germany; 7-8 Oct. 1993",
conftitle = "International DCE Workshop. DCE --- The OSF
Distributed Computing Environment Client/Server Model
and Beyond",
corpsource = "Rechenzentrum, Stuttgart Univ., Germany",
keywords = "account; applications; C; C applications; calls;
Capability; capability; Computer server; computer
server; DCE RPC; DFN-RPC; Early Participation; Early
Participation Program; FORTRAN; Fortran applications
distribution; Functionality; functionality; German
Research Network; German Research Network Society; IBM
computers; IBM RS/6000 workstations; message passing;
Message passing library; message passing library;
network servers; ONC; open systems; OSF Distributed
Computing Environment; Parallelization;
parallelization; Performance; performance; performance
evaluation; Program; PVM; remote procedure; Remote
procedure calls; scientific-technical;
Scientific-technical applications; Society; software
tools; SUN RPC; System programming tool; system
programming tool; systems analysis; Unix; UNIX computer
network; user-; User-account; workstations",
pubcountry = "Germany",
thesaurus = "FORTRAN; IBM computers; Message passing; Network
servers; Open systems; Performance evaluation; Remote
procedure calls; Software tools; Systems analysis;
Unix; Workstations",
treatment = "P Practical",
}
@Article{Robinson:1993:ECD,
author = "D. F. Robinson and D. Judd and P. K. McKinely and B.
H. C. Cheng",
title = "Efficient collective data distribution in all-port
wormhole-routed hypercubes",
journal = j-PROC-SUPERCOMPUT,
pages = "792--801",
month = "????",
year = "1993",
CODEN = "????",
ISBN = "0-8186-4340-4",
ISBN-13 = "978-0-8186-4340-8",
ISSN = "1063-9535",
LCCN = "QA76.5 .S894 1993",
bibdate = "Fri May 24 09:57:40 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper addresses the problem of collective data
distribution, specifically multicast, in
wormhole-routed hypercubes. The system model allows a
processor to send and receive dataa in all dimensions
simultaneously. New theoretical results that
characterize contention among messages in
wormhole-routed hypercubes are developed and used to
design new multicast routing algorithms. The algorithms
are compared in terms of the number of steps required
in each, their measured execution times when
implemented on a relatively small-scale nCUBE-2, and
their simulated execution times on larger hypercubes.",
acknowledgement = ack-nhfb,
affiliation = "Michigan State Univ",
affiliationaddress = "East Lansing, MI, USA",
classification = "723",
conference = "Proceedings of the Supercomputing '93 Conference",
conferenceyear = "1993",
fjournal = "Proceedings of the Supercomputing Conference",
journalabr = "Proc Supercomputing Conf",
keywords = "Algorithms; Message passing interface (MPI); Multicast
routing algorithms; Parallel processing systems;
Small-scale nCUBE-2; Wormhole-routed hypercubes",
meetingaddress = "Portland, OR, USA",
meetingdate = "Nov 15--19 1993",
meetingdate2 = "11/15--19/93",
publisherinfo = "Computer Society Press",
sponsor = "IEEE Computer Society; ACM SIGARCH",
}
@MastersThesis{Sept:1993:DIP,
author = "Doug Sept",
title = "The design, implementation and performance of a queue
manager for {PVM}",
type = "M.S. thesis",
school = "Computer Science Department, " # inst-UTK,
address = inst-UTK:adr,
pages = "viii + 45",
year = "1993",
bibdate = "Mon Jan 15 18:16:36 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Technical report CS-93-196: University of Tennessee,
Knoxville, Computer Science Department",
abstract = "The PVM Queue Manager (QM) application addresses some
of the load balancing problems associated with the
heterogeneous, multi-user, computing environments for
which PVM was designed. In such environments, PVM is
not only confronted with the difficulties of
distributing tasks among machines of variable loads, it
must also contend with machines of varying performance
levels in the same virtual machine. The QM addresses
both of these problems using two different load
balancing techniques, one static, the other dynamic. In
its simplest (static) mode, the QM will initiate PVM
processes for the user on demand, taking into account
information such as the peak megaflops/sec and actual
load of each machine. In addition to the initiation of
processes, the QM will also accept tasks to be
completed by a specified PVM process type. These tasks
are shipped to the QM where they are kept in a FIFO
queue. Worker processes in the virtual machine send
idle messages to the QM when they are ready for a task,
and the QM ships a task to the process if there is one
(of a type matching the process) in the queue. The QM
also maintains a list of idle processes and chooses the
best one for the task, should one arrive when several
processes are idle. Since faster machines typically
send more idle messages (and receive more tasks) than
slower ones, this provides a level of dynamic load
balancing for the system. Three applications have
already been implemented using the QM within PVM: a
Mandelbrot image generator, a conjugate-gradient
algorithm, and a map analysis program used in landscape
ecology applications. Benchmarks of elapsed wall-clock
time comparing standard PVM versions with the QM-based
versions demonstrate substantial performance gains for
both methods of load balancing. When processing a 1000
x 1000 image, for example, the QM-based Mandelbrot
application averaged 63.92 seconds, compared to 139.62
seconds for the standard PVM version in a heterogeneous
[sic] network of five workstations (comprised of Sun4's
and IBM RS/6000).",
acknowledgement = ack-nhfb,
keywords = "Parallel computers.; Queuing theory; Virtual computer
systems",
}
@InProceedings{Simonsen:1993:DMD,
author = "H. H. Simonsen and J. Amundsen",
title = "Distributed Molecular Dynamics Using the {PVM}
System",
crossref = "Sincovec:1993:SCP",
pages = "183--186",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Skjellum:1993:SLH,
author = "A. Skjellum",
title = "Scalable libraries in a heterogeneous environment",
crossref = "IEEE:1993:PIS",
pages = "13--20",
year = "1993",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
classification = "C5440 (Multiprocessor systems and techniques); C6155
(Computer communications software)",
keywords = "Communicating processes; Communication contexts;
Heterogeneous environment; Heterogeneous network
environment; Message-passing features; MPI standard;
Multicomputer libraries; Multicomputer toolbox
first-generation scalable libraries; Scalable
libraries; User program; Zipcode",
thesaurus = "Computer communications software; Message passing;
Multiprocessing systems",
}
@Article{Smith:1993:DSI,
author = "S. L. Smith",
title = "Dynamic scheduling of irregularly structured parallel
computations in heterogeneous distributed systems",
journal = j-SIGPLAN,
volume = "28",
number = "1",
pages = "86",
month = jan,
year = "1993",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "CERFACS, Toulouse, France",
classification = "C6150N (Distributed systems)",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "Dynamic centralized scheduling; Global optimization;
Heterogeneous computing environments; Heterogeneous
distributed systems; Irregularly structured parallel
computations; Parallel algorithm; Parallel virtual
machine; Performance evaluation; PVM environment;
Simulation",
thesaurus = "Distributed processing; Parallel programming;
Scheduling",
}
@InProceedings{Smith:1993:MBA,
author = "K. A. Smith",
title = "Multi-Processor Based Accident Using {PVM}",
crossref = "Sincovec:1993:SCP",
pages = "262--265",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Sochacki:1993:DCW,
author = "J. S. Sochacki and D. Mitchum and P. O'Leary and R. E.
Ewing",
title = "Distributed Computation of Wave Propagation Models
Using {PVM}",
crossref = "IEEE:1993:PSP",
pages = "22--33",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Sunderam:1993:PCC,
author = "V. Sunderam",
title = "The {PVM} Concurrent Computing System",
crossref = "Anonymous:1993:CDP",
pages = "20--84",
year = "1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{vanderPas:1993:PIG,
author = "R. {van der Pas}",
title = "The {PVM} implementation of a {Generalized Red Black}
algorithm",
journal = j-SUPERCOMPUTER,
volume = "10",
number = "4-5",
pages = "72--85",
month = jul # "--" # sep,
year = "1993",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Convex Computer, Utrecht, Netherlands",
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessor systems and techniques); C5470
(Performance evaluation and testing)",
corpsource = "Convex Computer, Utrecht, Netherlands",
fjournal = "Supercomputer",
keywords = "Convex Meta Series; EuroBen benchmark; evaluation;
Generalized Red Black algorithm; module MOD3H; Module
MOD3H; multiprocessing systems; parallel algorithms;
performance; performance measurements; Performance
measurements; performance numbers; Performance numbers;
Poisson; Poisson solver; PVM implementation; solver",
pubcountry = "Netherlands",
thesaurus = "Multiprocessing systems; Parallel algorithms;
Performance evaluation",
treatment = "P Practical",
}
@PhdThesis{Wilkinson:1993:IFT,
author = "Timothy James Wilkinson",
title = "Implementing Fault Tolerance in a 64-bit Distributed
Operating System",
school = "Systems Architecture Research Centre, City
University",
address = "London, UK",
month = jul,
year = "1993",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Misc/sasos.bib",
abstract = "This thesis explores the potential of 64-bit
processors for providing a different style of
distributed operating system. Rather than providing
another reworking of the UNIX model, the use of the
large address space for unifying volatile memory
(virtual memory), persistent memory (file systems) and
distributed network access is examined and a novel
operating system, Arius, is proposed. The concepts
behind the design of Arius are briefly reviewed, and
then the reliability of such a system is examined in
detail. The unified nature of the architecture makes it
possible to use a reliable single address space to
provide a completely reliable system without the
addition of other mechanisms. Protocols are proposed to
provide locally scalable distributed shared memory and
these are then augmented to handle machine failures
transparently though the use of distributed checkpoints
and rollback. The checkpointing system makes use of the
caching mechanism in DSM to provide data duplication
for failure recovery. By using distributed memory for
checkpoints, recovery from machine faults may be
handled seamlessly. To cope with more ``complete''
failures, persistent storage is also included in the
failure mechanism. These protocols are modelled to show
their operability and to determine the cost they incur
in various types of parallel and serial programs.
Results are presented to demonstrate these costs.",
}
@InProceedings{Young:1993:PEN,
author = "Y.-H. Young and K. Sikorski",
title = "Performance evaluation of network programming
environments",
crossref = "Mudge:1993:PTS",
pages = "106--107 (vol. 2)",
year = "1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Utah Univ., Salt Lake City, UT,
USA",
classification = "C5620 (Computer networks and techniques); C5670
(Network performance); C6115 (Programming support);
C6150G (Diagnostic, testing, debugging and evaluating
systems)",
keywords = "Benchmark tests; EXPRESS; ISIS; Jacobi iterative
algorithms; Library support; LINDA; Message passing;
Monte Carlo simulation; Network programming
environments; Performance evaluation; PVM; Scalability;
TCGMSG; TCP/IP network protocol; Token ring network;
UDP/IP network protocol",
thesaurus = "Computer networks; Message passing; Monte Carlo
methods; Performance evaluation; Programming
environments; Protocols",
}
@InProceedings{Zollweg:1993:OP,
author = "J. A. Zollweg",
title = "Overview of {PVM}",
crossref = "Anonymous:1993:PSE",
pages = "981--986",
year = "1993",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Cornell Nat. Supercomput. Facility, NY, USA",
classification = "C5640 (Protocols); C6110P (Parallel programming);
C6150N (Distributed systems software); C6150N
(Distributed systems); C7320 (Physics and chemistry
computing); C7320 (Physics and Chemistry)",
corpsource = "Cornell Nat. Supercomput. Facility, NY, USA",
keywords = "frequent communication; Frequent communication; high;
High performance switch; message passing;
message-passing environment; Message-passing
environment; parallel programming; Parallel Virtual
Machine; performance switch; physics computing;
protocols; PVM package; scalable POWERparallel system;
Scalable POWERparallel system; scientific application;
Scientific application; software packages; TCP/IP
communication; virtual machines; workstations;
Workstations",
pubcountry = "Switzerland",
thesaurus = "Message passing; Parallel programming; Physics
computing; Protocols; Software packages; Virtual
machines",
treatment = "G General Review; P Practical",
}
@InProceedings{Altas:1994:NIE,
author = "I. Altas and M. Rezny and J. Louis and K. Burrage and
R. Moore and J. Belward",
title = "A new image enhancement algorithm on {MasPar} and
{Parallel Virtual Machine} ({PVM}) environments",
crossref = "Dekker:1994:MPP",
pages = "819--826",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Centre for Image Anal., Charles Sturt Univ., Wagga
Wagga, NSW, Australia",
classification = "C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory); C5260B (Computer
vision and image processing techniques); C6110P
(Parallel programming)",
corpsource = "Centre for Image Anal., Charles Sturt Univ., Wagga
Wagga, NSW, Australia",
keywords = "computation environment; equations; fine grain; Fine
grain computation environment; image enhancement; image
enhancement algorithm; Image enhancement algorithm;
Machine; MasPar; minimisation; optimal image
enhancement; Optimal image enhancement; parallel
algorithms; Parallel Virtual; Parallel Virtual Machine;
partial differential; partial differential equations;
Partial differential equations; processing time;
Processing time; variational; Variational
minimisation",
pubcountry = "Netherlands",
sponsororg = "AKZO NOBEL; BSO; Convex Comput.; HPCN projects; IBM;
NOWESP; et al",
thesaurus = "Image enhancement; Parallel algorithms; Partial
differential equations",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Alund:1994:CFD,
author = "A. Alund and P. Lotstedt and R. Ryden",
title = "Computational fluid dynamics on workstation clusters
in industrial environments",
crossref = "Dongarra:1994:PSC",
pages = "1--10",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Swedish Inst. of Appl. Math., Goteborg, Sweden",
classification = "C4100 (Numerical analysis); C5620L (Local area
networks); C6110P (Parallel programming); C6150N
(Distributed systems software); C7460 (Aerospace
engineering computing)",
keywords = "3D Navier--Stokes code; ABB Corporate Research;
Combustion chambers; Compressible flow; Computational
fluid dynamics; CRAY Y-MP processor; Ethernet;
Industrial environments; Industrial production codes;
Multigrid method; Numerical simulations;
Parallelisation; PVM message passing system; SAAB
Military Aircraft; SGI R4000 workstations; Stationary
Euler equations; Stationary Navier--Stokes equations;
Swedish Institute of Applied Mathematics; Turbulent
flow; Volvo Flygmotor; Workstation clusters",
thesaurus = "Aerospace computing; Chemically reactive flow;
Combustion; Compressible flow; Engineering
workstations; Flow simulation; Local area networks;
Navier--Stokes equations; Numerical analysis; Parallel
programming; Turbulence",
}
@InProceedings{Amato:1994:PEP,
author = "M. Amato and A. Matrone and P. Schiano",
title = "A practical experience in parallelizing a large {CFD}
code: the {ENSOLV} flow solver",
crossref = "Gentzsch:1994:HPC",
pages = "508--513",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Centro Italiano Ricerche Aerospaziala, Capua, Italy",
classification = "A4710 (General fluid dynamics theory, simulation and
other computational methods); C5440 (Multiprocessing
systems); C6110P (Parallel programming); C6140D (High
level languages); C6150N (Distributed systems
software); C7320 (Physics and chemistry computing)",
keywords = "30000-Fortran-statements code; 3D Thin Layer
Navier--Stokes code; Complex aerodynamic configuration;
Computational fluid dynamics; Data transmission; ENSOLV
flow solver; Large CFD code; Message passing; MIMD
machines; Multidisciplinary group; Parallel
architectures; Parallel implementation; Practical
experience; PVM; Subsonic/transonic flow",
thesaurus = "Aerodynamics; FORTRAN; Message passing; Navier--Stokes
equations; Parallel machines; Parallel programming;
Physics computing",
}
@InProceedings{Andersen:1994:PIA,
author = "B. S. Andersen and P. Kaae and C. Keable and W.
Owczarz and J. Wasniewski and Z. Zlatev",
title = "{PVM} Implementations of Advection-Chemistry Modules
of Air Pollution Models",
crossref = "Dongarra:1994:PSC",
pages = "11--16",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Tech. Univ. Denmark, Lyngby, Denmark",
classification = "C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems software); C7320 (Physics and
chemistry computing); C7340 (Geophysics computing)",
corpsource = "Tech. Univ. Denmark, Lyngby, Denmark",
keywords = "advection-chemistry modules; Advection-chemistry
modules; air pollution; air pollution models; Air
pollution models; Air pollution reduction; atmospheric
chemistry; chemical transformations; Chemical
transformations; chemistry computing; environmental
science computing; evaluation; geophysical fluid
dynamics; implementations; mathematical models;
Mathematical models; module testing; Module testing;
parallel programming; performance; Performance; program
testing; PVM; PVM implementations; PVM program;
reduction; software performance; transport; Transport;
virtual machines; wind; Wind",
pubcountry = "Germany",
sponsororg = "Danish Comput. Centre for Res. and Educ.; Inst. Math.
Modelling; Tech. Univ. Denmark",
thesaurus = "Air pollution; Atmospheric chemistry; Chemistry
computing; Environmental science computing; Geophysical
fluid dynamics; Parallel programming; Program testing;
Software performance evaluation; Virtual machines;
Wind",
treatment = "P Practical",
}
@InProceedings{Anonymous:1994:ALM,
author = "Anonymous",
title = "Adaptive Load Migration Systems for {PVM}",
crossref = "IEEE:1994:PSW",
pages = "390--399",
year = "1994",
bibdate = "Mon Aug 26 10:38:41 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:1994:MMP,
author = "Anonymous",
title = "{MPI}: a message-passing interface standard",
journal = j-IJSAHPC,
volume = "8",
number = "3/4",
pages = "159--416",
month = "Fall-Winter",
year = "1994",
CODEN = "IJSAE9",
ISSN = "0890-2720",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Message passing is a paradigm used widely on certain
classes of parallel machines, especially these with
distributed memory. Although there are many variations,
the basic concept of processes communicating through
messages is well understood. Over the last ten years,
substantial progress has been made in casting
significant applications in this paradigm. Each vendor
has implemented its own variant. More recently, several
systems have demonstrated that a message-passing system
can be efficiently and portably implemented. A
definition of both the syntax and semantics of a core
of library routines is thus presented. It will be
useful to a wide range of users and efficiently
implementable on a wide range of computers.",
acknowledgement = ack-nhfb,
classification = "722.2; 722.3; 722.4; 723.1; 723.1.1; C5440
(Multiprocessing systems); C6110P (Parallel
programming); C6140D (High level languages); C6150N
(Distributed systems software)",
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
keywords = "C; C (programming language); C language; Codes
(standards); Collective communication; collective
communication; Computational linguistics; Computer
software; Conventions; conventions; Data communication
systems; FORTRAN; FORTRAN (programming language);
Fortran 77; Fortran bindings; Interfaces (computer);
Language binding; language binding; message passing;
Message passing interface; Message Passing Interface;
Message-passing interface standard; message-passing
interface standard; MPI environmental management; MPI
function; MPI function index; MPI terms; Name-shifting
convention; name-shifting convention; Parallel
machines; Parallel processing systems; parallel
programming; Point to point communication;
Point-to-point communication; point-to-point
communication; Process group collective communication
operations; process group collective communication
operations; Process topologies; process topologies;
Profiling interface; profiling interface; Programmer;
programmer; Standardization; standards; Topological
structures; topological structures; Unique
communication contexts; unique communication contexts;
Utility functions; utility functions",
thesaurus = "C language; FORTRAN; Message passing; Parallel
programming; Standards",
treatment = "P Practical",
}
@InProceedings{Antonuccio-Delogu:1994:PTN,
author = "V. Antonuccio-Delogu and U. Becciani",
title = "A parallel tree {N-body} code for heterogeneous
clusters",
crossref = "Dongarra:1994:PSC",
pages = "17--32",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Catania Astrophys. Obs., Italy",
classification = "C1160 (Combinatorial mathematics); C4240P (Parallel
programming and algorithm theory); C5620L (Local area
networks); C6110B (Software engineering techniques);
C6110P (Parallel programming); C6150N (Distributed
systems software); C7320 (Physics and chemistry
computing); C7350 (Astronomy and astrophysics
computing)",
keywords = "Cantania Astrophysical Observatory; Ethernet; F77
version; Generic situations; Heterogeneous clusters;
Heterogeneous workstation collection; Hypercube
communication pattern; Incomplete hypercube;
Mini-supercomputer; Orthogonal recursive bisection
oct-tree scheme; Parallel Barnes--Hut 3D N-body tree
algorithm; Parallel tree N-body code; Parallelization
scheme; Processing units; PVM 3.2.5; Software
environment; SPMD paradigm",
thesaurus = "Astronomy computing; Hypercube networks; Local area
networks; Octrees; Parallel algorithms; Parallel
programming; Physics computing; Software performance
evaluation; Software portability; Virtual machines;
Workstations",
}
@Article{Averbuch:1994:PES,
author = "A. Averbuch and E. Gabber and S. Itzikowitz and B.
Shoham",
title = "On the parallel elliptic single\slash multigrid
solutions about aligned and nonaligned bodies using the
{Virtual Machine for Multiprocessors}",
journal = j-SCI-PROG,
volume = "3",
number = "1",
pages = "13--32",
month = "Spring",
year = "1994",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4170 (Differential equations); C5440
(Multiprocessing systems); C6110B (Software engineering
techniques); C6110P (Parallel programming); C6150N
(Distributed systems software)",
corpsource = "Sch. of Math. Sci., Tel Aviv Univ., Israel",
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
keywords = "algorithm; aligned bodies; alignment overhead;
architectures; coherent services; distributed memory
multiprocessor; efficient programming; elliptic
equations; explicitly parallel application programs;
for Multi-Processors software package; grid points;
memory multiprocessors; memory systems; message
passing; MIMD; MOS; multi-user shared memory
multiprocessors; multiprocessors; nonaligned bodies;
parallel architectures; parallel elliptic; parallel
elliptic multigrid solutions; parallel program writing;
parallel programming; partial differential equations;
performance; portable programming; Sequent Symmetry;
shared; single grid solution; single-user shared;
software packages; software portability; transputer
network; transputer systems; Virtual Machine",
treatment = "P Practical",
}
@InProceedings{Aversa:1994:PSH,
author = "R. Aversa and N. Mazzocca and U. Villano",
title = "{PS}: a simulator for heterogeneous computing
environments",
crossref = "Dekker:1994:MPP",
pages = "335--343",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartimento di Inf. e Sistemistica, Naples Univ.,
Italy",
classification = "C6150N (Distributed systems software)",
keywords = "Distributed applications; Heterogeneous computing
environment simulator; Performance analysis;
Performance index accuracy; PS; PVM run-time system;
PVM Simulator; Simulation environment; Simulator
architecture",
thesaurus = "Parallel processing; Software performance evaluation",
}
@InProceedings{Bachem:1994:PCT,
author = "A. Bachem and W. Hochst{\"a}ttler and M. Malich",
title = "Simulated Trading --- a New Parallel Approach For
Solving Vehicle Routing Problems",
crossref = "Joubert:1994:PCT",
pages = "471--475",
year = "1994",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Techreports/ZPR.Koeln.bib",
annote = "We present a parallel improvement heuristic for
solving vehicle routing problems with additional
constraints. The algorithm was implemented on a
parallel transputer machine and on a cluster of
workstations using PVM. The computational results
obtained with sequential and parallel Simulated Trading
show that our approach is superior compared to all
heuristics known to the authors by now.",
crindex = "29k,6,zpr92-125.ps.gz",
}
@Article{Bala:1994:IEU,
author = "V. Bala and J. Bruck and R. Bryant and R. Cypher and
P. {De Jong}",
title = "The {IBM} external user interface for scalable
parallel systems",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "4",
pages = "445--??",
month = apr,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@InProceedings{Ballico:1994:PSP,
author = "M. Ballico and H. Lederer",
title = "{Plasmafusionsforschung: Serielles und paralleles
Rechnen mit nur einem Programmcode auf Cray YMP,
nCUBE2, Workstations mit PVM und KSR1}",
crossref = "Anonymous:1994:FWR",
pages = "232--234",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Baltas:1994:CPC,
author = "N. D. Baltas and C. S. van den Berghe",
title = "Comparison of the porting of a computational fluid
dynamics application to {SIMD} and {MIMD} computers",
crossref = "Dekker:1994:MPP",
pages = "761--767",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "CHAM, London, UK",
classification = "C6110B (Software engineering techniques); C6150N
(Distributed systems software); C7320 (Physics and
chemistry computing)",
keywords = "Computational fluid dynamics; DAP; ESPRlT III project;
Fortran-Plus; Maintainability; Massively parallel
architectures; Massively parallel computers;
Message-passing libraries; MIMD; MIMD Parsytec;
Parallel Software-Hardware Application; PARIY; PARMACS;
Parsytec model; PASHA; PHOENICS; Porting; Programming
models; PVM; Scalable code; SIMD; SIMD DAP",
thesaurus = "Message passing; Parallel architectures; Parallel
programming; Physics computing; Research initiatives;
Software maintenance; Software portability",
}
@InProceedings{Beguelin:1994:CMS,
author = "A. Beguelin and B. Bruegge",
title = "A configurable monitoring system for parallel
programming",
crossref = "IEEE:1994:PSI",
pages = "206",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C0310F (Software development management); C5440
(Multiprocessor systems and techniques); C6110B
(Software engineering techniques); C6110P (Parallel
programming); C6150G (Diagnostic, testing, debugging
and evaluating systems)",
keywords = "BEE++; Configurable distributed monitoring system;
Distributed programs; Heterogeneous systems; Message
passing system; Parallel program debugging; Parallel
programming; Parallel virtual machine; PVM",
thesaurus = "Configuration management; Message passing; Parallel
programming; Program debugging; System monitoring",
}
@Article{Beguelin:1994:HHN,
author = "A. Beguelin and J. J. Dongarra and G. Al Geist and R.
Manchek and K. Moore",
title = "{HeNCE}: a heterogeneous network computing
environment",
journal = j-SCI-PROG,
volume = "3",
number = "1",
pages = "49--60",
month = "Spring",
year = "1994",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C1160 (Combinatorial mathematics); C5620L (Local
area networks); C6110P (Parallel programming); C6115
(Programming support); C6150C (Compilers, interpreters
and other processors); C6150G (Diagnostic, testing,
debugging and evaluating systems); C6150N (Distributed
systems software)",
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
keywords = "Aggregate resources; Compilation; Data dependencies;
Data formats; Debugging; Directed graphs; Execution;
Graph nodes; Graphical language; HeNCE; Heterogeneous
network computing environment; Integrated graphical
environment; Local area network; Network computation;
Networked computers; Operating systems; Parallel
programs; Parallel virtual machine; Parallelism;
Supercomputer performance; Tracing; Writing",
thesaurus = "Directed graphs; Local area networks; Parallel
programming; Parallelising compilers; Program
debugging; Virtual machines",
}
@InProceedings{Beletsky:1994:OPV,
author = "V. Beletsky and T. Popova and A. Chemeris",
title = "Organization of a parallel virtual machine",
crossref = "Horiguchi:1994:ISP",
pages = "421--426",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6150N (Distributed systems
software); C7430 (Computer engineering)",
corpsource = "Lab. of Parallel Comput., Acad. of Sci., Kiev,
Ukraine",
keywords = "compilers; dependence graph building; loop
parallelization; parallel architectures; parallel
machines; parallel virtual machine organization;
processor; program compilers; scheduling; scheduling
job programs; simulating programs; simulation; virtual
machines",
sponsororg = "Japan Advanced Inst. Sci. and Technol.; IEEE Comput.
Soc.; IEEE Comput. Soc. Tech. Committee on Comput.
Archit.; IEEE Comput. Soc. Tech. Committee on Parallel
Process.; IPSJ Tech. Committee on Algorithms; IPSJ
Tech. Committee on Comput. Archit.; IEICE Tech.
Committee on Comput. Syst",
treatment = "P Practical",
}
@MastersThesis{Biradar:1994:ADL,
author = "Umesh V. Biradar",
title = "Adaptive distributed load balancing model for parallel
virtual machine",
type = "Master of Science in Computer Science",
school = "Department of Computer Science, College of
Engineering, Lamar University",
address = "Beaumont, TX, USA",
pages = "viii + 44",
year = "1994",
bibdate = "Mon Jan 15 18:16:39 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "Distributed operating systems (Computers); Electronic
data processing --- Distributed processing.;
Multiprocessors; Parallel processing (Electronic
computers)",
}
@TechReport{Bischof:1994:CSM,
author = "Christian Bischof and Institute for Defense Analyses",
title = "A Case study of {MPI}: portable and efficient
libraries",
type = "Technical report",
number = "SRC-TR-94-130",
institution = "Supercomputing Research Center: IDA",
address = "Lanham, MD, USA",
pages = "6",
year = "1994",
bibdate = "Sat Feb 24 09:43:12 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "``In this paper, we discuss the performance achieved
by several implementations of the recently defined
Message Passing Interface (MPI) standard. In
particular, performance results for different
implementations of the broadcast operation are analyzed
and compared on the Delta, Paragon, SP1 and CM5.''
Supported in part by the Applied and Computational
Mathematics Program, Advanced Research Projects Agency.
Supported in part by the Office of Scientific
Computing, U.S. Department of Energy.",
acknowledgement = ack-nhfb,
annote = "This paper will appear in the proceedings of the
Seventh SIAM conference on Parallel Processing for
Scientific Computing, September 15, 1994.",
keywords = "Parallel processing (Electronic computers)",
}
@InProceedings{Boerger:1994:FSP,
author = "E. Boerger and U. Glaesser",
title = "A Formal Specification of the {PVM} Architecture",
crossref = "Pehrson:1994:IPP",
pages = "402--409",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Borger:1994:AMP,
author = "E. Borger and U. Glasser",
title = "An abstract model of the {Parallel Virtual Machine}
({PVM})",
crossref = "Anonymous:1994:PDC",
pages = "308--309",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software)",
corpsource = "Dipartimento di Inf., Pisa Univ., Italy",
keywords = "(computers); abstract model; abstraction level;
algebraic specification; architecture; distributed
memory computer; distributed memory systems; formal;
heterogeneous distributed computing; message passing;
operating systems; Parallel Virtual Machine; PVM;
specification; user view; virtual machines",
sponsororg = "Int. Soc. Comput. and Their Appl.-ISCA; IEEE; Nat.
Supercomput. Centre for Energy and Environ.; Northern
Telecom; CRAY Res",
treatment = "P Practical",
}
@Article{Borger:1994:FSP,
author = "E. Borger and U. Glasser",
title = "A formal specification of the {PVM} architecture",
journal = j-IFIP-TRANS-A,
volume = "A-51",
pages = "402--409",
month = "????",
year = "1994",
CODEN = "ITATEC",
ISSN = "0926-5473",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartimento di Inf., Pisa Univ., Italy",
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessing systems); C5440 (Multiprocessor
systems and techniques); C6110B (Software engineering
techniques); C6150N (Distributed systems software);
C6150N (Distributed systems)",
conflocation = "Hamburg, Germany; 28 Aug.-2 Sept. 1994",
conftitle = "Technology and Foundations Information Processing '94.
IFIP 13th World Computer Congress",
corpsource = "Dipartimento di Inf., Pisa Univ., Italy",
fjournal = "IFIP Transactions. A. Computer Science and
Technology",
keywords = "concurrent evolving algebras; Concurrent evolving
algebras; data structures; Data structures; distributed
computing; distributed processing; formal
specification; Formal specification; heterogeneous;
Heterogeneous distributed computing; Machine; message;
message passing; Message passing; parallel machines;
Parallel Virtual; Parallel Virtual Machine; passing;
PVM architecture; virtual machines",
pubcountry = "Netherlands",
thesaurus = "Distributed processing; Formal specification; Message
passing; Parallel machines; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Boryczko:1994:LGA,
author = "K. Boryczko and M. Bubak and J. Kitowski and J.
Moscinski and R. Slota",
title = "Lattice gas automata and molecular dynamics on a
network of computers",
crossref = "Gentzsch:1994:HPC",
pages = "177--180",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. Inf., Krakow, Poland",
classification = "A0340G (Fluid dynamics: general mathematical
aspects); A0550 (Lattice theory and statistics; A4710
(General fluid dynamics theory, simulation and other
computational methods); C4240P (Parallel programming
and algorithm theory); C5620L (Local area networks);
C6110P (Parallel programming); C7320 (Physics and
chemistry computing); Ising problems)",
keywords = "Computer network; CONVEX C3210; Express; Fluid flow
simulation; Lattice gas automata; Molecular dynamics;
Network Linda; P4; Parallel algorithms; Parallel
programs; PVM; Workstation network",
thesaurus = "Automata theory; Digital simulation; Flow simulation;
Hydrodynamics; Lattice gas; Local area networks;
Parallel algorithms; Parallel programming; Physics
computing; Workstations",
}
@InProceedings{Briley:1994:NNH,
author = "W. R. Briley and D. S. Reese and A. Skjellum and L. H.
Turcotte",
title = "{NHPDCC}: The {National High Performance Distributed
Computing Consortium}",
crossref = "IEEE:1994:PSP",
pages = "2--9",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "NSF Eng. Res. Center for Comput. Field Simulation,
Mississippi State Univ., MS, USA",
classification = "C0200 (General computer topics); C6150N (Distributed
systems software)",
keywords = "Benchmarks; Consortium; High performance computing;
MPI message-passing; Multi-vendor; National High
Performance Distributed Computing Consortium; NHPDCC;
Scalable parallel libraries; Software projects",
thesaurus = "Distributed processing; Societies",
}
@InProceedings{Bubak:1994:EMD,
author = "M. Bubak and J. Moscinski and M. Pogoda and W.
Zdechlikiewicz",
title = "Efficient molecular dynamics simulation on networked
workstations",
crossref = "Gruber:1994:PJE",
pages = "191--194",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci, Cracow, Poland",
classification = "A0260 (Numerical approximation and analysis); A6120J
(Computer simulation of static and dynamic liquid
behaviour); C4240C (Computational complexity); C4240P
(Parallel programming and algorithm theory); C6110P
(Parallel programming); C6150N (Distributed systems
software); C6185 (Simulation techniques); C7320
(Physics and chemistry computing)",
keywords = "Computational complexity; Efficient molecular dynamics
simulation; Large particle numbers; Link lists;
Neighbor lists; Networked workstations; Parallel 2D
molecular dynamics program; Parallel MD algorithm; PVM
programming environment; Sequential 2D molecular
dynamics program",
thesaurus = "Computational complexity; Digital simulation; List
processing; Local area networks; Molecular dynamics
method; Parallel algorithms; Parallel programming;
Physics computing; Workstations",
}
@InProceedings{Bubak:1994:FLG,
author = "M. Bubak and J. Moscinski and R. Slota",
title = "{FHP} lattice gas on networked workstations",
crossref = "Gruber:1994:PJE",
pages = "427--430",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Control Sci., Cracow, Poland",
classification = "A0550 (Lattice theory and statistics; A4710 (General
fluid dynamics theory, simulation and other
computational methods); C4240P (Parallel programming
and algorithm theory); C6110P (Parallel programming);
C6150N (Distributed systems software); C6185
(Simulation techniques); C7320 (Physics and chemistry
computing); Ising problems)",
keywords = "Computer power; Domain decomposition; Dynamic load
balancing; FHP lattice gas; Fluid flows; Lattice gas
simulation; Networked workstations; Normal load;
Parallel algorithm; Parallel distributed program; PVM",
thesaurus = "Digital simulation; Flow simulation; Fluid dynamics;
Lattice gas; Local area networks; Operating systems
[computers]; Parallel algorithms; Parallel programming;
Physics computing; Resource allocation; Workstations",
}
@InProceedings{Bubak:1994:IPL,
author = "M. Bubak and J. Moscinski and R. Slota",
title = "Implementation of Parallel Lattice Gas Program on
Workstations under {PVM}",
crossref = "Dongarra:1994:PSC",
pages = "136--146",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci, Akademia Gorniczo-Hutnicza,
Cracow, Poland",
classification = "A0270 (Computational techniques); A0545 (Theory and
models of chaotic systems); A0550 (Lattice theory and
statistics; A4710 (General fluid dynamics theory,
simulation and other computational methods); C4220
(Automata theory); C4240P (Parallel programming and
algorithm theory); C6110P (Parallel programming); C7320
(Physics and chemistry computing); Ising problems)",
conflocation = "Lyngby, Denmark; 20-23 June 1994",
conftitle = "Proceedings of Workshop on Parallel Scientific
Computing",
corpsource = "Inst. of Comput. Sci, Akademia Gorniczo-Hutnicza,
Cracow, Poland",
keywords = "algorithms; balancing; cellular automata; domain
decomposition; Domain decomposition; dynamic load;
Dynamic load balancing; flow simulation; fluid flow
simulation; Fluid flow simulation; lattice gas; lattice
gas automata program; Lattice gas automata program;
parallel; parallel lattice gas program; Parallel
lattice gas program; physics computing",
pubcountry = "Germany",
sponsororg = "Danish Comput. Centre for Res. and Educ.; Inst. Math.
Modelling; Tech. Univ. Denmark",
thesaurus = "Cellular automata; Flow simulation; Lattice gas;
Parallel algorithms; Physics computing",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Bubak:1994:PDS,
author = "M. Bubak and J. Mosciniski and M. Pogoda and W.
Zdechlikiewicz",
title = "Parallel distributed {2-D} short-range molecular
dynamics on networked workstations",
crossref = "Dongarra:1994:PSC",
pages = "127--135",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci, Akademia Gorniczo-Hutnicza,
Cracow, Poland",
classification = "A0260 (Numerical approximation and analysis); A0270
(Computational techniques); A6120J (Computer simulation
of static and dynamic liquid behaviour); C4240C
(Computational complexity); C4240P (Parallel
programming and algorithm theory); C6110P (Parallel
programming); C7320 (Physics and chemistry computing)",
keywords = "Computational complexity; Execution time;
Lennard-Jones systems; Link lists; Memory requirements;
Neighbour lists; Networked workstations; Parallel
algorithm; Parallel distributed 2-D short-range
molecular dynamics; PVM programming environment",
thesaurus = "Computational complexity; Lennard-Jones potential;
Molecular dynamics method; Parallel algorithms; Physics
computing",
}
@InProceedings{Burrer:1994:RRB,
author = "C. Burrer and P. Remy",
title = "{RUBIS}: a runtime basic interface software on {TELMAT
T9000 TN} series",
crossref = "deGloria:1994:TAS",
pages = "63--78",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "TELMAT MULTINODE, Soultz, France",
classification = "C6110P (Parallel programming); C6150C (Compilers,
interpreters and other processors); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150J
(Operating systems); C6150N (Distributed systems
software)",
keywords = "Code portability; Compiling; Configuration; Functional
flexibility; INMOS TOOLSET environment; Local resources
management; Micro-kernel; MPI prototype; Object
abstractions; Parallel programs; Parallel transputer
machines; PVM subsystem; RUBIS; Runtime basic interface
software; T.Paragraph post-mortem tool; TELMAT T9000 TN
series",
thesaurus = "Operating system kernels; Parallel programming;
Parallelising compilers; Program diagnostics; Software
performance evaluation; Software portability;
Transputer systems",
}
@InProceedings{Campanai:1994:EAS,
author = "M. Campanai and O. Morales and S. Viti and R. Trotta
and P. Viliani and M. {Lo Moro}",
title = "Experiences assessing software testing activities: the
adoption of {PVM}, a prediction and validation model",
crossref = "Anonymous:1994:SQC",
pages = "491--500",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C0310F (Software development management); C6110S
(Software metrics); C6150G (Diagnostic, testing,
debugging and evaluating systems)",
corpsource = "CESVIT/CQ ware, Florence, Italy",
keywords = "activity optimization; improvement path; management;
managers; prediction and; program testing; program
verification; project leaders; PVM; software; software
development; software metrics; software system;
software testing; software testing activity assessment;
spatial applications; system monitoring; technicians;
telecommunications; validation model",
pubcountry = "Switzerland",
treatment = "P Practical",
}
@InProceedings{Casas:1994:ALM,
author = "J. Casas and R. Konuru and S. W. Otto and R. Prouty
and J. Walpole",
title = "Adaptive load migration systems for {PVM}",
crossref = "IEEE:1994:PSW",
pages = "390--399",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://sc94.ameslab.gov/AP/contents.html",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst.
of Sci. and Technol., Portland, OR, USA",
classification = "C6150N (Distributed systems software)",
conflocation = "Washington, DC, USA; 14-18 Nov. 1994",
conftitle = "Proceedings of Supercomputing '94",
corpsource = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst.
of Sci. and Technol., Portland, OR, USA",
keywords = "adaptive data movement; Adaptive data movement;
adaptive load distribution; Adaptive load distribution;
adaptive load migration systems; Adaptive load
migration systems; ADM; allocation; effectiveness;
Effectiveness; heterogeneous workstation network;
message passing; message passing system; Message
passing system; migratable PVM; Migratable PVM; MPVM;
parallel algorithms; parallel applications; Parallel
applications; performance; Performance; programming;
programming methodology; Programming methodology;
resource; shared; Shared heterogeneous workstation
network; transparent migration; Transparent migration;
Unix; Unix process; UPVM; usability; Usability;
user-level PVM; User-level PVM; virtual machines;
virtual processors; Virtual processors; workstation
environment changes; Workstation environment changes",
sponsororg = "IEEE Comput. Soc.; ACM; SIAM",
thesaurus = "Message passing; Parallel algorithms; Programming;
Resource allocation; Unix; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Castagnera:1994:NEP,
author = "K. Castagnera and D. Cheng and R. Fatoohi and E. Hook
and B. Kramer and C. Manning and J. Musch and C.
Niggley and W. Saphir and D. Sheppard and M. Smith and
I. Stockdale and S. Welch and R. Williams and D. Yip",
title = "{NAS} experiences with a prototype cluster of
workstations",
crossref = "IEEE:1994:PSW",
pages = "410--419",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "NAS Syst. Div., NASA Ames Res. Center, Moffett Field,
CA, USA",
classification = "C5430 (Microcomputers); C5470 (Performance
evaluation and testing); C5620 (Computer networks and
techniques); C6150N (Distributed systems software)",
keywords = "Aeroscience problems; Computational fluid dynamics;
Cycle recovery; Loosely coupled cluster; NAS; NAS
Parallel Benchmarks; OVERFLOW-PVM; Performance
evaluation; Primary system users; Prototype cluster;
Silicon Graphics; System management issues; Workstation
cluster",
thesaurus = "Distributed processing; Fluid dynamics; Message
passing; Performance evaluation; Physics computing;
Workstations",
}
@InProceedings{Cheng:1994:PDP,
author = "D. Cheng and R. Hood",
title = "A portable debugger for parallel and distributed
programs",
crossref = "IEEE:1994:PSW",
pages = "723--732",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Comput. Sci. Corp., NASA Ames Res. Center, Moffett
Field, CA, USA",
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6150G (Diagnostic, testing, debugging
and evaluating systems); C6150N (Distributed systems
software)",
keywords = "Client portability; Client server interaction; Client
server model; Debugger code; Distributed programs;
Message passing implementations; Message passing
library; Message passing programs; MPI programs;
Parallel programs; Portable debugger; Process
abstractions; PVM; Server components; Tool generated
code; User interface",
thesaurus = "Client-server systems; Message passing; Parallel
programming; Program debugging; Software portability",
}
@Misc{Choudhary:1994:LCR,
author = "Alok Choudhary and Ian Foster and Geoffrey Fox and Ken
Kennedy and Carl Kesselman and Charles Koelbel and Joel
Saltz and Marc Snir",
title = "Languages, Compilers, and Runtime Systems Support for
Parallel Input-Output",
year = "1994",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/Parallel.io.bib",
note = "Scalable I/O Initiative Working Paper Number 3. On WWW
at \path=http://www.ccsf.caltech.edu/SIO/SIO.html=.",
URL = "http://www.ccsf.caltech.edu/SIO/SIO.html",
comment = "Language extensions to support parallel I/O. Compiler
optimizations. Runtime library to support the compiler
and interface with the native file system. Compiler
would develop a mapping of data to the processor
memories and to the disks, and then decide on I/O
schedules to move data around, overlap I/O with
computation, even move computation around to best fit
what is available in memory at a given time. It can
also help with checkpointing. Compiler should pass info
to the runtime system, which in turn may need to pass
info to the file system, to help with optimization. I/O
scheduling includes reordering accesses; they even go
so far as to propose doing seek optimization in the
runtime library. Support for collective I/O. Extension
of MPI to I/O, to take advantage of its support for
asynchrony, scatter-gather, {\em etc}. On the way, they
hope to work with the FS people to decide on the
functional requirements of the file system. See also
poole:sio-survey, bagrodia:sio-character,
bershad:sio-os.",
keyword = "parallel I/O, multiprocessor file system, pario bib",
}
@InProceedings{Clarke:1994:MMP,
author = "L. Clarke and I. Glendinning and R. Hempel",
title = "The {MPI Message Passing Interface Standard}",
crossref = "Decker:1994:PEM",
pages = "213--218",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Coelho:1994:EHC,
author = "F. Coelho",
title = "Experiments with {HPF} compilation for a network of
workstations",
crossref = "Gentzsch:1994:HPC",
pages = "423--428",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Centre de Recherche en Inf., Ecole des Mines de Paris,
Fontainebleau, France",
classification = "C6110P (Parallel programming); C6140D (High level
languages); C6150C (Compilers, interpreters and other
processors)",
keywords = "Communication hardware; Data-parallel Fortran;
Distributed memory multiprocessors; High Performance
Fortran; HPF compilation; Optimizing compiler; PVM
3-based generated code; Scalable performance;
Workstation network",
thesaurus = "FORTRAN; Optimising compilers; Parallel languages;
Parallelising compilers",
}
@Article{Cooper:1994:CHF,
author = "M. D. Cooper and N. A. Burton and R. J. Hall and I. H.
Hillier",
title = "Combined {Hartree--Fock} and density functional
theory: a distributed memory parallel implementation",
journal = j-J-MOL-STRUCT-THEOCHEM,
volume = "121",
pages = "97--107",
month = dec,
year = "1994",
CODEN = "THEODJ",
ISSN = "0166-1280 (print), 1872-7999 (electronic)",
ISSN-L = "0166-1280",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Chem., Manchester Univ., UK",
classification = "A3115 (General mathematical and computational
developments for atoms and molecules); A3120J (Local
density approximation (atoms and molecules)); C6110P
(Parallel programming); C7320 (Physics and chemistry
computing)",
fjournal = "Journal of molecular structure. Theochem",
keywords = "Ab initio code; Density functional theory; Direct SCF
energy evaluation; Distributed memory parallel
implementation; GAUSSIAN 92; Gradient evaluation;
Hartree--Fock theory; Hewlett--Packard 9000-7xx series;
Kohn--Sham density functional code; Parallel Virtual
Machine; Parallelisation; Portable communications
package; PRISM algorithm; PVM; Two-electron integrals;
UNIX workstations; Workstation cluster",
pubcountry = "Netherlands",
thesaurus = "Ab initio calculations; Density functional theory;
Distributed memory systems; HF calculations; Parallel
algorithms; Parallel programming; Physics computing;
SCF calculations",
}
@InProceedings{Cote:1994:PSA,
author = "J. Cote and S. J. Thomas",
title = "Parallel Semi-{Lagrangian} Advection on the Sphere
Using {PVM}",
crossref = "Pierce:1994:PSH",
pages = "470--477",
year = "1994",
bibdate = "Mon Oct 26 07:49:42 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. de Recherche en Prevision Numerique, Environment
Canada, Dorval, Que., Canada",
classification = "C1160 (Combinatorial mathematics); C4240P (Parallel
programming and algorithm theory); C4260 (Computational
geometry); C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C6150N
(Distributed systems)",
keywords = "Courant-Friedrichs-Lewy condition; Distributed MIMD
parallel algorithms; Eulerian methods; Intel iPSC/860;
Numerical methods; Parallel message-passing
implementation; Parallel performance; Parallel
semi-Lagrangian advection; Parallel virtual machine;
PVM; Semi-Lagrangian method; Shallow-water equations;
Sphere; Spherical geometry; Sub-grid dimensions",
thesaurus = "Computational geometry; Hypercube networks; Message
passing; Parallel algorithms; Parallel machines",
}
@InProceedings{Cote:1994:PSL,
author = "J. Cote and S. J. Thomas",
title = "Parallel {semi-Lagrangian} advection on the sphere
using {PVM}",
crossref = "Dekker:1994:MPP",
pages = "801--808",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A0260 (Numerical approximation and analysis); A0340G
(Fluid dynamics: general mathematical aspects); A4710
(General fluid dynamics theory, simulation and other
computational methods); C1160 (Combinatorial
mathematics); C4160 (Numerical integration and
differentiation); C4240P (Parallel programming and
algorithm theory); C4260 (Computational geometry);
C5220P (Parallel architecture); C5440 (Multiprocessing
systems); C5440 (Multiprocessor systems and
techniques); C6150N (Distributed systems software);
C6150N (Distributed systems); C7320 (Physics and
chemistry computing)",
corpsource = "Div. de Recherche en Prevision Numerique, Environment
Canada, Dorval, Que., Canada",
keywords = "algorithms; computational fluid dynamics;
computational geometry; Courant Friedrichs Lewy
condition; Courant-Friedrichs-Lewy condition;
dimensions; distributed memory systems; distributed
MIMD implementation; distributed MIMD parallel
algorithms; Eulerian methods; evaluation; fluid
dynamics; hypercube networks; integration; Intel
iPSC/860; Lagrangian method; message passing; methods;
numerical; parallel; parallel algorithms; parallel
machines; parallel message-passing implementation;
parallel semi-Lagrangian advection; passive advection;
performance; physics computing; problem; processor;
PVM; scalable code; semi-; shallow-water equations;
software performance; sphere; spherical geometry;
sub-grid; sub-grid dimensions; time steps; transport
processes; virtual machine",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput.
Appl",
treatment = "P Practical",
xxauthor = "S. J. Thomas and J. Cote",
}
@InProceedings{Cownie:1994:PPP,
author = "J. Cownie and A. Dunlop and S. Hellberg and A. J. G.
Hey and D. Pritchard",
title = "Portable parallel programming environments-the {ESPRIT
PPPE} project",
crossref = "Dekker:1994:MPP",
pages = "135--142",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Meiko Ltd., Bristol, UK",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support)",
keywords = "ESPRIT PPPE project; European hardware manufacturers;
HPF mapper; Integrated tool environments; Large
distributed memory parallel computers; Large scale
scientific and engineering applications; Mainstream
applications development; Migration aids; Open system
standards; Parallel architectures; Parallel debugger;
PARMACS/MPI; PCTE; Performance monitor; Portable
parallel programming environments; Program debugger;
Run-time environment; Software houses",
thesaurus = "Parallel architectures; Parallel programming;
Programming environments; Research initiatives;
Software houses; Software portability; Standards",
}
@Article{daCunha:1994:PIR,
author = "Rudnei Dias {da Cunha} and Tim Hopkins",
title = "A parallel implementation of the restarted {GMRES}
iterative algorithm for nonsymmetric systems of linear
equations",
journal = j-ADV-COMPUT-MATH,
volume = "2",
number = "3",
pages = "261--277",
month = "????",
year = "1994",
CODEN = "ACMHEX",
ISSN = "1019-7168",
ISSN-L = "1019-7168",
bibdate = "Mon Oct 07 09:09:23 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We describe the parallelisation of the GMRES$ (c) $
algorithm and its implementation on distributed-memory
architectures, using both networks of transputers and
networks of workstations under the PVM message-passing
system. The test systems of linear equations considered
are those derived from five-point finite-difference
discretisations of partial differential equations. A
theoret model of the computation and communication
phases is presented which allows us to decide for which
values of the parameter $c$ our implementation executes
efficiently. The results show that for reasonably large
discretisation grids the implementations are effective
on a large number of processors.",
acknowledgement = ack-nhfb,
affiliation = "Centro de Processamento de Dados, Univ. Federal do Rio
Grande do Sul, Brazil",
classification = "C4130 (Interpolation and function approximation);
C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory); C5440
(Multiprocessor systems and techniques)",
fjournal = "Advances in computational mathematics",
keywords = "(65F10) Numerical analysis; (65Y05) Numerical
analysis; Communication phases; Computer aspects of
numerical algorithms; Distributed-memory architectures;
Five-point finite-difference discretisations; Iterative
methods for linear systems (See also 65N22); Networks
of transputers; Networks of workstations; Nonsymmetric
systems of linear equations; Numerical linear algebra;
Parallel computation; Parallel implementation; Partial
differential equations; PVM message-passing system;
Restarted GMRES iterative algorithm",
pubcountry = "Switzerland",
thesaurus = "Distributed memory systems; Finite difference methods;
Iterative methods; Message passing; Parallel
algorithms; Partial differential equations",
}
@InProceedings{Damodaran-Kamal:1994:MSR,
author = "S. K. Damodaran-Kamal and J. M. Francioni",
title = "mdb: a semantic race detection tool for {PVM}",
crossref = "Pierce:1994:PSH",
pages = "702--709",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Southwestern Louisiana Univ.,
Lafayette, LA, USA",
classification = "C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems)",
corpsource = "Dept. of Comput. Sci., Southwestern Louisiana Univ.,
Lafayette, LA, USA",
keywords = "C; C programs; debugging tool; Debugging tool;
detection; deterministic replay; Deterministic replay;
erroneous executions; Erroneous executions; error;
error detection; Error detection; expressions; Fortran
programs; hazards and race conditions; mdb; Mdb;
message; message passing parallel programs; Message
passing parallel programs; nondeterminism;
Nondeterminism; parallel programming; passing; program
debugging; programs; PVM; receive operation; Receive
operation; run-time detection; Run-time detection;
semantic; Semantic expressions; semantic race detection
tool; Semantic race detection tool; sequential debugger
invocation; Sequential debugger invocation",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput.
Appl",
thesaurus = "Error detection; Hazards and race conditions; Message
passing; Parallel programming; Program debugging",
treatment = "P Practical",
}
@InProceedings{Damodaran-Kamal:1994:TRP,
author = "S. K. Damodaran-Kamal and J. M. Francioni",
title = "Testing races in parallel programs with an {OtOt}
strategy",
crossref = "Ostrand:1994:PIS",
journal = j-SIGSOFT,
year = "1994",
CODEN = "SFENDP",
ISSN = "0163-5948",
ISSN-L = "0163-5948",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
affiliation = "Department of Comput. Sci., Southwestern Louisiana
Univ., Lafayette, LA, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems); C7430
(Computer engineering)",
fjournal = "ACM SIGSOFT Software Engineering Notes",
issue = "spec. issue. p. 216-27",
journal-URL = "https://dl.acm.org/citation.cfm?id=J728",
keywords = "Concurrent programs; Controlled execution; Debugging
tool; Exponential complexity; General-purpose run-time
testing technique; Mdb; Nondeterminism;
One-thread-at-one-time strategy; OtOt strategy;
Parallel Virtual Machine; Polynomial time complexity;
Race conditions specification; Race detection; Race
expressions; Race testing; Unrestricted message passing
parallel programs",
thesaurus = "Computational complexity; Hazards and race conditions;
Message passing; Parallel programming; Program
debugging; Program testing; Virtual machines",
}
@Article{Dean:1994:CPV,
author = "C. E. Dean and R. C. Denny and P. C. Stephenson and G.
J. Milne and E. Pantos",
title = "Computing with parallel virtual machines",
journal = j-J-PHYS-IV-COLLOQUE,
volume = "4",
number = "C9",
pages = "C9/445--448",
month = nov,
year = "1994",
CODEN = "JPICEI",
ISSN = "1155-4339",
ISSN-L = "1155-4339",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
affiliation = "SERC Daresbury Lab., Warrington, UK",
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C7320 (Physics and
chemistry computing)",
fjournal = "Journal de physique. IV, Colloque",
keywords = "64-Node Intel iPSC/860 hypercube; Computing elements;
CPU performance; DALAI; LSQINT; Parallel execution;
Parallel virtual machines; PATTERN; Processing time;
PROJECT; Single program multiple data; Synchrotron
radiation",
thesaurus = "Parallel programming; Physics computing; Synchrotron
radiation",
}
@Article{DeKeyser:1994:RTL,
author = "J. DeKeyser and K. Lust and D. Roose",
title = "Run-time load balancing support for a parallel
multiblock {Euler\slash Navier--Stokes} code with
adaptive refinement on distributed memory computers",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "8",
pages = "1069--1088",
month = aug,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Katholieke Univ., Leuven,
Belgium",
classification = "A4710 (General fluid dynamics theory, simulation and
other computational methods); C4185 (Finite element
analysis); C4240P (Parallel programming and algorithm
theory); C6150N (Distributed systems software)",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "Adaptive mesh refinement; Distributed memory
computers; Euler/Navier--Stokes code; Load balance;
Load balancing; Mesh refinement; Parallel algorithm;
Parallel performance; Parallel programming library",
pubcountry = "Netherlands",
thesaurus = "Distributed memory systems; Finite element analysis;
Fluid dynamics; Navier--Stokes equations; Parallel
algorithms; Resource allocation",
}
@InProceedings{DeRoeck:1994:CFP,
author = "Y. H. {De Roeck} and R. E. Plessix",
title = "Combining {F90} and {PVM} to Construct Synthetic
Seismograms by Ray-Tracing",
crossref = "IEEE:1994:OOE",
volume = "2",
pages = "II-653--II-658",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IFREMER, Brest, France",
classification = "A9130R (Controlled source seismology); A9365 (Data
and information; A9385 (Instrumentation and techniques
for geophysical, hydrospheric and lower atmosphere
research); acquisition, processing, storage and
dissemination in geophysics); B6140C (Optical
information, image and video signal processing); B7710
(Geophysical techniques and equipment); C5260B
(Computer vision and image processing techniques);
C7340 (Geophysics computing)",
conflocation = "Brest, France; 13-16 Sept. 1994",
conftitle = "Proceedings of OCEANS'94",
corpsource = "IFREMER, Brest, France",
keywords = "Energy attenuation; energy attenuation; Explosion
seismology; explosion seismology; F90; geophysical
prospecting; geophysical signal processing; geophysical
techniques; inverse problems; Marine reflection seismic
record; marine reflection seismic record; Measurement
technique; measurement technique; Modell; modell;
Multiple echo; multiple echo; Oceanic crust; oceanic
crust; Parallel Virtual Machine; Parallelisation;
parallelisation; profiling; Prospecting; prospecting;
PVM; Ray-conversion; ray-conversion; Ray-tracing;
ray-tracing; Seafloor; seafloor; seismic reflection;
Seismic reflection profiling; seismology; Synthetic
seismogram; synthetic seismogram; Vectorisation;
vectorisation",
sponsororg = "Oceanic Eng. Soc. IEEE; Soc. Electr. Electron. France;
Communaute Urbaine de Brest",
thesaurus = "Geophysical prospecting; Geophysical signal
processing; Geophysical techniques; Inverse problems;
Seismology",
treatment = "P Practical; T Theoretical or Mathematical",
}
@Article{Deshpande:1994:ADN,
author = "Manish Deshpande and Jinzhang Feng and Charles L.
Merkle and Ashish Deshpande",
title = "Application of a Distributed Network in Computational
Fluid Dynamic Simulations",
journal = j-IJSA,
volume = "8",
number = "1",
pages = "64--67",
month = "Spring",
year = "1994",
CODEN = "IJSAE9",
ISSN = "0890-2720",
bibdate = "Tue Feb 18 09:47:23 MST 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover
library database",
abstract = "A general-purpose 3-D, incompressible Navier--Stokes
algorithm is implemented on a network of concurrently
operating workstations using PVM and compared with its
performance on a CRAY Y-MP and on an Intel iPSC\slash
860. The problem is relatively computationally
intensive, and has a communication structure based
primarily on nearest-neighbor communication, making it
ideally suited to message passing. Such problems are
frequently encountered in CFD, and their solution is
increasingly in demand. The communication structure is
explicitly coded in the implementation to fully exploit
the regularity in message passing in order to produce a
near-optimal solution. Results are presented for
various grid sizes using up to eight processors.",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Mech. Eng., Pennsylvania State Univ.",
affiliationaddress = "University Park, PA, USA",
classification = "631.1.1; 721.1; 722.4; 723.1; 723.2; 723.5",
fjournal = "International Journal of Supercomputer Applications",
journalabr = "Int J Supercomput Appl High Perform Comput",
keywords = "Algorithms; Communication structure; Computational
complexity; Computational fluid dynamic simulations;
Computer simulation; Computer workstations; Concurrent
operations; Data structures; Data transfer; Distributed
computer systems; Fluid dynamics; Incompressible
Navier--Stokes algorithm; Nearest neighbor
communication; Optimization; Three dimensional",
}
@InProceedings{Dikken:1994:DDL,
author = "L. Dikken and F. van der Linden and J. Vesseur and P.
Sloot",
title = "{DynamicPVM}: {Dynamic} load balancing on parallel
systems",
crossref = "Gentzsch:1994:HPC",
pages = "273--277",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Shell Nederland Informatieverwerking, Rijswijk,
Netherlands",
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6150J (Operating systems); C6150N
(Distributed systems software)",
keywords = "DynamicPVM; Load balancing; Loosely coupled
processors; Migration; Multi tasking; Multiuser;
Parallel systems; Parallel Virtual Machine; Process
checkpointing; PVM; Restart mechanism; Scheduling",
thesaurus = "Message passing; Parallel programming; Processor
scheduling; Resource allocation; Virtual machines",
}
@InProceedings{Dykes:1994:CCP,
author = "S. G. Dykes and Xiaodong Zhang and Yan Zhou and Haixu
Yang",
title = "Communication and computation patterns of large scale
image convolutions on parallel architectures",
crossref = "Siegal:1994:PEI",
pages = "926--931",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "High Performance Comput. and Software Lab., Texas
Univ., San Antonio, TX, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C5220P (Parallel architecture); C5260B (Computer vision
and picture processing); C5440 (Multiprocessor systems
and techniques)",
keywords = "CM-5; Communication overhead; Computation patterns;
Convolution; Convolution calculations; Execution time;
Fast memory store; Imag segmentation; Image processing
operations; IPSC/860; Large kernel convolutions; Large
scale image convolutions; Memory access demand;
Parallel algorithms; Parallel architectures; Processor
power; PVM distributed memory multicomputers; Texture
segmentation application",
thesaurus = "Distributed memory systems; Image segmentation; Image
texture; Parallel algorithms; Parallel machines",
}
@InProceedings{Elamvazuthi:1994:OPA,
author = "C. Elamvazuthi and G. A. Manson",
title = "{Occam}, {PVM} and the Alternative Construct",
crossref = "Miles:1994:PTO",
pages = "56--68",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Sheffield Univ., UK",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6140D (High level languages); C6150N
(Distributed systems software)",
corpsource = "Dept. of Comput. Sci., Sheffield Univ., UK",
keywords = "alternation construct; Alternation construct; code;
Code generation; code mapping; Code mapping;
Communicating State Diagram; communication event;
Communication event; computer aided software
engineering; CSD; diagrammatic representation;
Diagrammatic representation; diagrammatic technique;
Diagrammatic technique; generation; Machine;
methodology; Methodology; model; Model process
behaviour; Occam; occam programming language; Occam
programming language; parallel; parallel architecture;
Parallel architecture; Parallel CASE tool; parallel
processing; Parallel processing; Parallel system
design; Parallel Virtual; Parallel Virtual Machine;
process behaviour; programming; PVM; software tool;
Software tool; software tools; system design; virtual
machines",
pubcountry = "Netherlands",
thesaurus = "Computer aided software engineering; Occam; Parallel
programming; Software tools; Virtual machines",
treatment = "P Practical; T Theoretical or Mathematical",
}
@Article{Eppstein:1994:CSP,
author = "M. J. Eppstein and D. E. Dougherty",
title = "A comparative study of {PVM} workstation cluster
implementations of a two-phase subsurface flow model",
journal = j-ADV-WATER-RESOURCES,
volume = "17",
number = "3",
pages = "181--??",
month = "????",
year = "1994",
CODEN = "AWREDI",
ISSN = "0309-1708 (print), 1872-9657 (electronic)",
ISSN-L = "0309-1708",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Advances in Water Resources",
}
@Article{Escaig:1994:PMD,
author = "Y. Escaig and G. Touzot and M. Vayssade",
title = "Parallelization of a multilevel domain decomposition
method",
journal = j-COMPUT-SYST-ENG,
volume = "5",
number = "3",
pages = "253--263",
month = jun,
year = "1994",
CODEN = "COSEEO",
ISSN = "0956-0521",
ISSN-L = "0956-0521",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "CRIHAN, Mont Saint Aignan, France",
classification = "C4185 (Finite element analysis); C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C5470
(Performance evaluation and testing); C7310
(Mathematics computing)",
fjournal = "Computing systems in engineering: an international
journal",
keywords = "CRAY Y-MP; Distributed memory machines; Distributed
memory systems; Ethernet network; Finite element
method; IBM RS/6000 workstations; Interface problem;
MIMD; Multilevel domain decomposition method; Multiple
instructions multiple data; Multiprocessor machines;
Parallel Virtual Machine; Performance; PVM; Shared
memory machine; Shared memory systems",
pubcountry = "UK",
thesaurus = "Cray computers; Distributed memory systems; Finite
element analysis; IBM computers; Mathematics computing;
Performance evaluation; Shared memory systems",
}
@Article{Ewing:1994:DCW,
author = "Richard E. Ewing and Robert C. Sharpley and Derek
Mitchum and P. O'Leary and J. S. Sochacki",
title = "Distributed Computation of Wave Propagation Models
Using {PVM}",
journal = j-IEEE-PAR-DIST-TECH,
volume = "2",
number = "1",
pages = "26--31",
month = "Spring",
year = "1994",
CODEN = "IPDTEX",
DOI = "https://doi.org/10.1109/88.281870",
ISSN = "1063-6552 (print), 1558-1861 (electronic)",
ISSN-L = "1063-6552",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover
library database",
abstract = "The Parallel Vitural Machine lets researchers create a
powerful, inexpensive parallel system on which they can
solve large, sophisticated problems such as simulating
the propagation of seismic waves.",
acknowledgement = ack-nhfb,
affiliation = "Texas A and M Univ., College Station, TX, USA",
classification = "A9130F (Surface and body waves); C5440
(Multiprocessor systems and techniques); C5620
(Computer networks and techniques); C6110P (Parallel
programming); C6150N (Distributed systems); C7340
(Geophysics)",
corpsource = "Texas A and M Univ., College Station, TX, USA",
fjournal = "IEEE parallel and distributed technology: systems and
applications",
keywords = "C; Communication networks; communication networks;
Computational power; computational power; Cost
effectiveness; cost effectiveness; Distributed
computation; distributed computation; Ethernet; Fiber;
fiber; Fortran; geophysics computing; Hypercubes;
hypercubes; machines; Meshes; meshes; network operating
systems; parallel; parallel programming; Parallel
Virtual Machine; PVM; Remote procedural libraries;
remote procedural libraries; Rings; rings; Seismic wave
propagation simulation; seismic wave propagation
simulation; seismic waves; Wave propagation models;
wave propagation models",
thesaurus = "Geophysics computing; Network operating systems;
Parallel machines; Parallel programming; Seismic
waves",
treatment = "P Practical",
}
@Article{Femminella:1994:PBP,
author = "A. Femminella and A. Omodeo",
title = "{PVM-based} parallel computing: a case study on power
plant simulation",
journal = j-MICROPROC-MICROPROG,
volume = "40",
number = "10-12",
pages = "875--878",
month = dec,
year = "1994",
CODEN = "MMICDT",
ISSN = "0165-6074 (print), 1878-7061 (electronic)",
ISSN-L = "0165-6074",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Centro Ricerca di Autom., ENEL, Milan, Italy",
classification = "B8110D (Power system planning and layout); B8200
(Generating stations and plants); C6110P (Parallel
programming); C6150N (Distributed systems software);
C7410B (Power engineering computing)",
conflocation = "Liverpool, UK; Sept. 1994",
conftitle = "20th Annual Euromicro Conference. System Architecture
and Integration",
corpsource = "Centro Ricerca di Autom., ENEL, Milan, Italy",
fjournal = "Microprocessing and Microprogramming",
keywords = "case study; Case study; digital simulation;
distributed programming; Distributed programming;
distributed software platform; Distributed software
platform; heterogeneous workstation; Heterogeneous
workstation network; independently evolving;
Independently evolving processes; machines; message
exchange; Message exchange; network; open systems;
parallel programming; Parallel Virtual Machine;
periodic synchronization; Periodic synchronization;
power plant; Power plant simulation; power plants;
power system analysis computing; processes; PVM-based
parallel computing; simulation; Transputer network;
virtual; weakly-coupled processes; Weakly-coupled
processes",
pubcountry = "Netherlands",
thesaurus = "Digital simulation; Open systems; Parallel
programming; Power plants; Power system analysis
computing; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Fineberg:1994:IMM,
author = "S. A. Fineberg",
title = "Implementing multidisciplinary and multi-zonal
applications using {MPI}",
crossref = "IEEE:1994:FSF",
pages = "496--503",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Comput. Sci. Corp., NASA Ames Res. Center, Moffett
Field, CA, USA",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
conftitle = "Proceedings Frontiers '95. The Fifth Symposium on the
Frontiers of Massively Parallel Computation",
corpsource = "Comput. Sci. Corp., NASA Ames Res. Center, Moffett
Field, CA, USA",
keywords = "codes; Codes; message passing; Message Passing
Interface; multidisciplinary applications;
Multidisciplinary applications; multizonal
applications; Multizonal applications; parallel
programming; parallel programs; Parallel programs;
performance; Performance; point-to-point message
passing routines; Point-to-point message passing
routines; portable library; Portable library; single
program multiple data stream; Single program multiple
data stream; standard; Standard; standards",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Comput. Archit.;
NASA; Univ. Maryland Inst. Adv. Comput. Studies; George
Mason Univ",
thesaurus = "Message passing; Parallel programming; Standards",
treatment = "P Practical",
}
@Article{Flower:1994:EJM,
author = "Jon Flower and Adam Kolawa",
title = "{Express} is not just a message passing system:
current and future directions in {Express}",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "4",
pages = "597--614",
day = "31",
month = apr,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:14:00 MDT 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1994&volume=20&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1994&volume=20&issue=4&aid=860",
abstract = "We describe some of the features of Express and the
way that they were developed as a response to the needs
of application programmers. We show how currently
emerging computing platforms have led to new
application needs and show how these are satisfied with
Express features. We introduce a recently developed
programming style which greatly simplifies programming
as well as directly addressing complex issues such as
dynamic load balancing and fault tolerance. Finally, we
present a comparison of Express' features and
motivation to the Message Passing Interface (MPI)
standard currently being developed.",
acknowledgement = ack-nhfb,
affiliation = "ParaSoft Corp",
affiliationaddress = "Pasadena, CA, USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C6150N (Distributed
systems)",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "Application developers toolkit; Application needs;
Computer programming; Computing platforms; Dynamic load
balancing; Dynamics; Express; Express features; Fault
tolerance; Fault tolerance system; Message passing
Express; Message passing interface (MPI) standard;
Message passing programming style; Message passing
system; MPI standard",
pubcountry = "Netherlands",
thesaurus = "Message passing; Parallel programming; Resource
allocation",
}
@InProceedings{Franke:1994:EIM,
author = "H. Franke and P. Hochschild and P. Pattnaik and M.
Snir",
title = "An Efficient Implementation of {MPI}",
crossref = "Decker:1994:PEM",
pages = "219--230",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Franke:1994:MEI,
author = "H. Franke and P. Hochschild and P. Pattnaik and M.
Snir",
title = "{MPI-F}: An Efficient Implementation of {MPI} on
{IBM-SP1}",
crossref = "Agrawal:1994:PIC",
pages = "III-197--III-201",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5470 (Performance
evaluation and testing); C6150N (Distributed systems
software)",
conftitle = "Proceedings of 23rd Annual International Conference on
Parallel Processing",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
keywords = "distributed memory cluster; distributed memory
systems; IBM computers; IBM-SP1; message passing; MPI;
parallel architectures; performance; performance
evaluation",
sponsororg = "Pennsylvania State Univ",
treatment = "P Practical",
}
@InProceedings{Franke:1994:MMP,
author = "H. Franke and P. Hochschild and P. Pattnaik and J.-P.
Prost and M. Snir",
title = "{MPI-F}: an {MPI} Prototype Implementation on {IBM
SP1}",
crossref = "Dongarra:1994:PSW",
pages = "43--55",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150E (General utility programs); C6150N
(Distributed systems software); C6180 (User
interfaces)",
conftitle = "Proceedings of the Second Workshop on Environments and
Tools for Parallel Scientific Computing",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
keywords = "application program interfaces; distributed memory
systems; External User Interface; IBM 9076 Scalable
PowerPARALLEL 1 system; IBM computers; IBM SP1; message
passing; Message-Passing Interface; modifications;
MPI-F; native EUI library; parallel machines;
performance measurements; prototype implementation;
software libraries; software performance evaluation;
software prototyping; user interfaces",
treatment = "P Practical",
}
@Article{Freeman:1994:SMM,
author = "T. L. Freeman and J. M. Bull",
title = "Shared Memory and Message Passing Implementations of
Parallel Algorithms for Numerical Integration",
journal = j-LECT-NOTES-COMP-SCI,
volume = "879",
pages = "219--228",
year = "1994",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Sep 15 10:01:31 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1994.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "computing; heterogeneous network; PARA; parallel
scientific computing",
}
@InProceedings{Gajecki:1994:NAT,
author = "M. Gajecki and J. Moscinski",
title = "A new algorithm for the traveling salesman problem on
networked workstations",
crossref = "Dongarra:1994:PSC",
pages = "229--235",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci., Akademia Gorniczo-Hutnicza,
Cracow, Poland",
classification = "C1160 (Combinatorial mathematics); C1180
(Optimisation techniques); C4240P (Parallel programming
and algorithm theory); C5620L (Local area networks);
C6150N (Distributed systems software)",
keywords = "Efficiency; Local optimization method; Networked
workstations; Parallel algorithm; PVM; SUN SPARCstation
IPX; Traveling salesman problem",
thesaurus = "Local area networks; Parallel algorithms; Travelling
salesman problems; Workstations",
}
@InProceedings{Geist:1994:CCW,
author = "G. A. Geist",
title = "Cluster computing: the wave of the future?",
crossref = "Dongarra:1994:PSC",
pages = "236--246",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat. Lab., TN, USA",
classification = "C0230 (Economic, social and political aspects of
computing); C5620L (Local area networks); C6150N
(Distributed systems software); C7300 (Natural sciences
computing)",
keywords = "Cluster computing; Distributed memory computer;
Heterogeneous network research project; Oak Ridge
National Laboratory; Parallel computers; Parallel
Virtual Machine; Portable robust software; PVM;
Research issues; Scientific problems; Serial computers;
Social issues; Software package; Standard; Tennessee
University; User defined computer collection; Vector
computers; Workstation clusters",
thesaurus = "Distributed memory systems; Local area networks;
Natural sciences computing; Social aspects of
automation; Software packages; Virtual machines;
Workstations",
}
@Book{Geist:1994:PPV,
author = "Al Geist and Adam Beguelin and Jack Dongarra and
Weicheng Jiang and Robert Manchek and Vaidyalingam S.
Sunderam",
title = "{PVM}: Parallel Virtual Machine: a Users' Guide and
Tutorial for Networked Parallel Computing",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xvii + 279",
year = "1994",
ISBN = "0-262-57108-0 (paperback)",
ISBN-13 = "978-0-262-57108-1 (paperback)",
LCCN = "QA76.58 .P85 1994",
bibdate = "Thu Feb 29 17:35:15 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "US\$27.50",
series = "Scientific and engineering computation",
URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571080",
acknowledgement = ack-nhfb,
keywords = "Computer networks.; Networks --- Parallel programming;
Parallel computers.",
}
@MastersThesis{Grengbondai:1994:CPU,
author = "Jules Crephat Grengbondai",
title = "Concurrent processing under Parallel Virtual Machine
({PVM})",
type = "M.S. thesis",
school = "Department of Computer Science, Southern Illinois
University at Carbondale",
address = "Carbondale, IL, USA",
pages = "vi + 97",
year = "1994",
bibdate = "Mon Jan 15 18:16:53 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Gropp:1994:MCL,
author = "W. Gropp and E. Lusk",
title = "The {MPI} communication library: its design and a
portable implementation",
crossref = "IEEE:1994:PSP",
pages = "160--165",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
classification = "C5440 (Multiprocessing systems); C5440
(Multiprocessor systems and techniques); C6110P
(Parallel programming); C6150N (Distributed systems
software); C6150N (Distributed systems)",
conftitle = "Proceedings of Scalable Parallel Libraries
Conference",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "advanced features; Advanced features; implementation
strategy; Implementation strategy; message passing; MPI
communication library; MPI standard; parallel
programming; portable implementation; Portable
implementation; software portability; standard
message-passing interface; Standard message-passing
interface; standards",
sponsororg = "Mississippi State Univ.; Nat. Sci. Found",
thesaurus = "Message passing; Parallel programming; Software
portability; Standards",
treatment = "P Practical",
}
@InProceedings{Gropp:1994:SEP,
author = "W. Gropp and B. Smith",
title = "Scalable, extensible, and portable numerical
libraries",
crossref = "IEEE:1994:PSP",
pages = "87--93",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
classification = "C6110B (Software engineering techniques); C6120
(File organisation); C6180 (User interfaces)",
keywords = "Aggressive data-structure-neutral implementation; Data
structures; Implementation language; Meta-communication
layer; Parallel communication technology; PETSc
library; Portable Extensible Tools for Scientific
computing; Portable numerical libraries; Software
portability; Software technology; User interfaces;
User-interface language",
thesaurus = "Data structures; Software portability; User
interfaces",
}
@Book{Gropp:1994:UMP,
author = "William Gropp and Ewing Lusk and Anthony Skjellum",
title = "Using {MPI}: Portable Parallel Programming with the
Message-Passing Interface",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xx + 307",
year = "1994",
ISBN = "0-262-57104-8",
ISBN-13 = "978-0-262-57104-3",
LCCN = "QA76.642 G76 1994",
bibdate = "Thu Feb 29 17:35:09 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "US\$24.95",
series = "Scientific and engineering computation",
URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571048",
acknowledgement = ack-nhfb,
keywords = "Computer interfaces.; Parallel computers ---
Programming.; Parallel programming; Parallel
programming (Computer science)",
}
@Article{Gupta:1994:CTE,
author = "M. Gupta and P. Banerjee",
title = "Compile-time estimation of communication costs of
programs",
journal = j-J-PROGRAM-LANG,
volume = "2",
number = "3",
pages = "191--225",
month = sep,
year = "1994",
CODEN = "JPLAER",
ISSN = "0963-9306",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
classification = "C6130 (Data handling techniques); C6150C (Compilers,
interpreters and other processors); C6150G (Diagnostic,
testing, debugging and evaluating systems)",
fjournal = "Journal of Programming Languages",
keywords = "Array references; Communication optimizations;
Compile-time estimation; Compiler; Data distribution;
Data movement; Data partitioning decisions; Distributed
memory machines; Fortran programs; Global address
space; High-level communication primitives; Loops;
Paradigm compiler; Processors; Program analysis;
Program communication costs; Ptran-II High-Performance
Fortran prototype compiler; Traversal properties",
pubcountry = "UK",
thesaurus = "Data handling; Distributed memory systems; Optimising
compilers; System monitoring",
}
@InProceedings{Haeuser:1994:RNS,
author = "J. Haeuser and M. Spel and J. Muylaert and R. D.
Williams",
title = "Results for the {Navier--Stokes} Solver {ParNSS} on
Workstation Clusters and {IBM SP1} Using {PVM}",
crossref = "Wagner:1994:CFD",
pages = "432--442",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Hakula:1994:FEM,
author = "H. Hakula and J. Malinen and P. Kallberg and P.
Valve",
title = "The finite element method applied to the exterior
{Helmholtz} problem on the {IBM SP-1}",
crossref = "Dongarra:1994:PSC",
pages = "262--269",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Helsinki Univ. of Technol., Espoo, Finland",
classification = "A0230 (Function theory, analysis); A0260 (Numerical
approximation and analysis); A4110H (Electromagnetic
waves: theory); B0290P (Differential equations); B0290T
(Finite element analysis); B5210 (Electromagnetic wave
propagation); C4170 (Differential equations); C4185
(Finite element analysis); C6110P (Parallel
programming); C6150N (Distributed systems software);
C7320 (Physics and chemistry computing)",
keywords = "2D domains; Complex linear equations; Electromagnetic
waves; Exterior Helmholtz problem; Finite element
method; Helmholtz equation; IBM SP-1 machine; Irregular
meshes; Monitoring facilities; Numerical analysis
research; Parallel implementation; Parallel Virtual
Machine environment; Performance; Quasi-minimal
residual method; Regular meshes; Scattering problem;
Single program multiple data model",
thesaurus = "Electromagnetic wave scattering; Finite element
analysis; Helmholtz equations; IBM computers; Parallel
machines; Parallel programming; Physics computing;
Software performance evaluation",
}
@TechReport{Hardwick:1994:PVL,
author = "Jonathan C. Hardwick",
title = "Porting a vector library: a comparison of {MPI},
{Paris}, {CMMD} and {PVM} (or, ``{I'll} never have to
port {CVL} again'')",
type = "Research paper",
number = "CMU-CS-94-200",
institution = inst-SCS-CMU,
address = inst-SCS-CMU:adr,
pages = "16",
year = "1994",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper describes the design and implementation in
MPI of the parallel vector library CVL, which is used
as the basis for implementing nested data-parallel
languages such as NESL and Proteus. We compare the ease
of writing and debugging the portable MPI
implementation of CVL with our experiences writing
previous versions in CM-2 Paris, CM-5 CMMD, and PVM,
and give initial performance results for MPI CVL
running on an IBM SP- 1, Intel Paragon, and TMC CM-5.",
acknowledgement = ack-nhfb,
annote = "An earlier version of this paper appeared in
`Proceedings of the 2nd Scalable Parallel Libraries
Conference', Mississippi State University, Mississippi,
October 1994. November 1994. Supported in part by the
Wright Laboratory, Aeronautical Systems Center, Air
Force Materiel Command, USAF, and the Advanced Research
Projects Agency (ARPA). Supported in part by the
Pittsburgh Supercomputing Center. Supported in part by
the National Center for Supercomputing Applications.
Supported in part by the Argonne National Laboratory.",
keywords = "Parallel programming (Computer science)",
}
@Article{Hellberg:1994:PPP,
author = "S. A. Hellberg and E. Zaluska",
title = "A portable parallel programming environment based
around {PCTE}",
journal = j-INFO-SOFTWARE-TECH,
volume = "36",
number = "7",
pages = "419--425",
month = jul,
year = "1994",
CODEN = "ISOTE7",
ISSN = "0950-5849 (print), 1873-6025 (electronic)",
ISSN-L = "0950-5849",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Electron. and Comput. Sci., Southampton Univ., UK",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
fjournal = "Information and Software Technology",
keywords = "Distributed-memory; End-user applications; High
Performance Fortran; High-Performance Computing
Community programming standards;
Massively-parallel-processor; Message-Passing
Interface; PCTE; PCTE-based integrated toolset;
Performance; Portable common tool environment; Portable
parallel programming environment",
pubcountry = "UK",
thesaurus = "Message passing; Parallel programming; Programming
environments",
}
@InProceedings{Hempel:1994:MSM,
author = "R. Hempel",
title = "The {MPI Standard for Message Passing}",
crossref = "Gentzsch:1994:HPC",
pages = "247--252",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "German Nat. Res. Center for Comput. Sci., St.
Augustin, Germany",
classification = "C5220P (Parallel architecture); C6110P (Parallel
programming)",
conftitle = "High-Performance Computing and Networking
International Conference. Proceedings, Volume II:
Networking and Tools",
corpsource = "German Nat. Res. Center for Comput. Sci., St.
Augustin, Germany",
keywords = "message passing; Message passing; message-passing
interfaces; Message-passing interfaces; MPI standard;
parallel computing; Parallel computing; parallel
programming; standard; Standard; standards",
thesaurus = "Message passing; Parallel programming; Standards",
treatment = "P Practical",
}
@Article{Henriksen:1994:PCF,
author = "P. Henriksen and R. Keunings",
title = "Parallel computation of the flow of integral
viscoelastic fluids on a heterogeneous network of
workstations",
journal = j-INT-J-NUMER-METHODS-FLUIDS,
volume = "18",
number = "12",
pages = "1167--1183",
month = jun,
year = "1994",
CODEN = "IJNFDW",
ISSN = "0271-2091",
ISSN-L = "0271-2091",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Univ. Catholique de Louvain, Belgium",
classification = "A4750 (Non-Newtonian dynamics); C4240P (Parallel
programming and algorithm theory); C7320 (Physics and
Chemistry)",
fjournal = "International Journal for Numerical Methods in
Fluids",
keywords = "Computational mechanics problems; Compute-intensive
treatment; Deformation; Dynamic allocation; Fibre
suspension flow; Flow; Heterogeneous network of
workstations; Integral viscoelastic fluids; Internal
variables; Load balancing; Parallel algorithms;
Parallel computation; Parallel efficiency; POLYFLOW
package; Public domain PVM software library; Static
allocation; Viscoplastic solids",
pubcountry = "UK",
thesaurus = "Flow simulation; Non-Newtonian flow; Parallel
algorithms; Physics computing",
}
@InProceedings{Hiranandani:1994:CTB,
author = "S. Hiranandani and K. Kennedy and J. Mellor-Crummey
and A. Sethi",
title = "Compilation techniques for block-cyclic
distributions",
crossref = "ACM:1994:CPI",
pages = "392--403",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Rice Univ., Houston, TX, USA",
classification = "C6110P (Parallel programming); C6140D (High level
languages); C6150C (Compilers, interpreters and other
processors)",
keywords = "Block-cyclic distributions; Code; Compilers; Data
alignment; Data-parallel languages; Fortran D;
High-Performance Fortran; Linear-time algorithm; Memory
access sequence; MIMD distributed-memory machines;
Nonunit strides; Symbolic array dimensions; Symbolic
loop bounds",
thesaurus = "FORTRAN; Parallel languages; Program compilers",
}
@InProceedings{Issman:1994:PME,
author = "E. Issman and G. Degrez and J. {De Keyser}",
title = "A Parallel Multiblock {Euler\slash Navier--Stokes}
Solver on a Cluster of Workstations Using {PVM}",
crossref = "Gentzsch:1994:HPC",
volume = "1",
pages = "157--162",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "CFD Group, Von Karman Inst. for Fluid Dynamics,
Rhode-St.-Genese, Belgium",
classification = "A4710 (General fluid dynamics theory, simulation and
other computational methods); C5440 (Multiprocessing
systems); C6110P (Parallel programming); C6150J
(Operating systems); C6150N (Distributed systems
software); C7320 (Physics and chemistry computing)",
corpsource = "CFD Group, Von Karman Inst. for Fluid Dynamics,
Rhode-St.- Genese, Belgium",
keywords = "adaptive 2D-multiblock Euler/Navier--Stokes; Adaptive
2D-multiblock Euler/Navier--Stokes solver; automatic
load-; Automatic load-balancing; balancing; Block
distribution; block distribution; cluster; environment;
LOGO software library; Navier--Stokes equations;
parallel; Parallel computer; parallel computer;
parallel machines; Parallel multiblock
Euler/Navier--Stokes solver; parallel multiblock
Euler/Navier--Stokes solver; Parallel Virtual Machine
communication software; Parallelised; parallelised;
physics computing; Processors; processors; programming;
PVM; resource allocation; Run-time; run-time; solver;
Solver porting; solver porting; Unix; Unix workstation
cluster; Unix workstation cluster environment;
workstation; Workstation cluster",
pubcountry = "Germany",
thesaurus = "Navier--Stokes equations; Parallel machines; Parallel
programming; Physics computing; Resource allocation;
Unix",
treatment = "P Practical",
}
@Article{Iwashita:1994:IPE,
author = "S. Iwashita and K. Murakami",
title = "Implementation and performances evaluation of {KU
PVM3\slash AP1000}",
journal = j-ENG-SCI-REP-KYUSHU,
volume = "16",
number = "3",
pages = "345--352",
month = dec,
year = "1994",
CODEN = "SRKHEK",
ISSN = "0388-1717",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C6110P (Parallel programming);
C6150N (Distributed systems software)",
fjournal = "Engineering Sciences Reports, Kyushu University",
keywords = "Basic communication intensive benchmarks; KU
PVM3/AP1000; Network configurations; Parallel computer;
Parallel programming library; Performance evaluation;
Virtual workstation cluster; Workstation clusters",
language = "Japanese",
pubcountry = "Japan",
thesaurus = "Parallel machines; Parallel programming; Performance
evaluation; Software libraries; Software performance
evaluation",
}
@InProceedings{Joubert:1994:PAL,
author = "A. Joubert",
title = "Parallel algorithms for linear and nonlinear equations
derived from networks",
crossref = "Joubert:1994:PCT",
pages = "145--152",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "London Parallel Applications Centre, Queen Mary and
Westfield Coll., UK",
classification = "C4140 (Linear algebra); C4150 (Nonlinear and
functional equations); C4240P (Parallel programming and
algorithm theory)",
keywords = "Linear equations; Load flow; Nonlinear equations;
Power systems",
thesaurus = "Graph theory; Linear algebra; Network analysis;
Nonlinear equations; Parallel algorithms",
}
@InProceedings{Judd:1994:PIV,
author = "D. Judd and N. K. Ratha and P. K. McKinley and J. Weng
and A. K. Jain",
title = "Parallel implementation of vision algorithms on
workstation clusters",
crossref = "IEEE:1994:PIF",
pages = "317--321 (vol. 3)",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
classification = "B6140C (Optical information, image and video signal
processing); C1220 (Simulation, modelling and
identification); C5260B (Computer vision and image
processing techniques); C6110P (Parallel programming)",
keywords = "Distributed cluster platforms; Motion parameter
estimation algorithm; Sequential CLUSTER program;
Square-error data clustering method; Vision algorithms;
Workstation clusters",
thesaurus = "Computer vision; Parallel algorithms; Parameter
estimation",
}
@Article{Karamcheti:1994:SOM,
author = "Vijay Karamcheti and Andrew A. Chien",
title = "Software overhead in messaging layers: where does the
time go?",
journal = j-SIGPLAN,
volume = "29",
number = "11",
pages = "51--60",
month = nov,
year = "1994",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat May 1 15:50:17 MDT 1999",
bibsource = "http://www.acm.org/pubs/toc/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org:80/pubs/citations/proceedings/asplos/195473/p51-karamcheti/",
abstract = "Despite improvements in network interfaces and
software messaging layers, software communication
overhead still dominates the hardware routing cost in
most systems. In this study, we identify the sources of
this overhead by analyzing software costs of typical
communication protocols built atop the active messages
layer on the CM-5. We show that up to 50-70\% of the
software messaging costs are a direct consequence of
the gap between specific network features such as
arbitrary delivery order, finite buffering, and limited
fault-handling, and the user communication requirements
of in-order delivery, end-to-end flow control, and
reliable transmission. However, virtually all of these
costs can be eliminated if routing networks provide
higher-level services such as in-order delivery,
end-to-end flow control, and packet-level
fault-tolerance. We conclude that significant cost
reductions require changing the constraints on
messaging layers: we propose designing networks and
network interfaces which simplify or replace software
for implementing user communication requirements.",
acknowledgement = ack-nhfb,
classification = "B6150M (Protocols); B6210L (Computer
communications); C5440 (Multiprocessing systems);
C5610N (Network interfaces); C5640 (Protocols); C6150N
(Distributed systems software)",
conflocation = "San Jose, CA, USA; 4-7 Oct. 1994",
conftitle = "Sixth International Conference on Architectural
Support for Programming Languages and Operating Systems
(ASPLOS-VI)",
corpsource = "Dept. of Comput. Sci., Illinois Univ., Urbana, IL,
USA",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "active messages layer; arbitrary delivery order finite
buffering; CM-5; communication protocols; cost
reductions; cost reductions packet level fault
tolerance; design; end-to-end flow control; hardware
routing cost; in-order delivery; limited fault
handling; measurement; message passing; messaging
layers; network features; network interfaces; packet
level fault tolerance; parallel machines; performance;
protocols; reliable transmission; software
communications overhead; software messaging costs;
software messaging layers; software overhead;
standardization; theory; user communication
requirements",
sponsororg = "ACM; IEEE Comput. Soc",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Message sending. {\bf C.2.2}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Protocols. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS. {\bf C.2.1}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Architecture and Design. {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors),
Parallel processors**.",
treatment = "P Practical",
}
@InProceedings{Karrels:1994:PAM,
author = "E. Karrels and E. Lusk",
title = "Performance Analysis of {MPI} Programs",
crossref = "Dongarra:1994:PSW",
pages = "195--200",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6115 (Programming support); C6150E (General utility
programs); C6150G (Diagnostic, testing, debugging and
evaluating systems); C6150N (Distributed systems
software)",
conftitle = "Proceedings of the Second Workshop on Environments and
Tools for Parallel Scientific Computing",
corpsource = "Dept. of Comput. Sci., Wisconsin Univ., Oshkosh, WI,
USA",
keywords = "application program interfaces; functions library;
message passing; Message Passing Interface; MPI
programs; parallel computation; parallel programming;
performance analysis; portable publicly available
implementation; profiling interface; profiling
libraries; profiling tools; software libraries;
software performance evaluation; specification",
treatment = "P Practical",
}
@InProceedings{Knies:1994:SLL,
author = "A. D. Knies and F. R. Barriuso and W. J. Harrod and G.
B. {Adams, III}",
title = "{SLICC}: a low latency interface for collective
communications",
crossref = "IEEE:1994:PSW",
pages = "89--96",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Purdue Univ., West Lafayette, IN, USA",
classification = "C5220P (Parallel architecture); C6150N (Distributed
systems software)",
keywords = "Cray T3D; Directly memory access; Interprocessor
communications; Low latency interface; Low-level
collective communications interface; Parallel
computers; Performance results; PVM; Referenced
processing element; Shared address-space library
interface; Shared distributed memory systems; SLICC;
Software models",
thesaurus = "Application program interfaces; Cray computers;
Distributed memory systems; Message passing; Shared
memory systems; Software libraries; Software
performance evaluation",
}
@InProceedings{Konuru:1994:ULP,
author = "R. Konuru and J. Casas and R. Prouty and S. Otto and
J. Walpole",
title = "A user-level process package for {PVM}",
crossref = "Pierce:1994:PSH",
pages = "48--55",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessor systems and techniques); C6110P
(Parallel programming); C6150J (Operating systems);
C6150N (Distributed systems)",
corpsource = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst.
of Sci. and Technol., Beaverton, OR, USA",
keywords = "dynamic load balancing; lightweight; message passing;
message-based; operating systems (computers); parallel;
parallel programming; parallel programs; performance
evaluation; processor; programming; PVM; resource
allocation; source-code compatible PVM interface;
SPMD-style PVM applications; standard PVM; UPVM;
user-level process package; virtual processors;
virtualization",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput.
Appl",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Konuru:1994:UPP,
author = "R. Konuru and J. Casas and R. Prouty and S. Otto and
J. Walpole",
title = "A User-Level Process Package for {PVM}",
crossref = "Pierce:1994:PSH",
pages = "48--55",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst.
of Sci. and Technol., Beaverton, OR, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessor systems and techniques); C6110P
(Parallel programming); C6150J (Operating systems);
C6150N (Distributed systems)",
keywords = "Dynamic load balancing; Lightweight virtual
processors; Message-based parallel programs; Parallel
programming; Processor virtualization; PVM; Source-code
compatible PVM interface; SPMD-style PVM applications;
Standard PVM; UPVM; User-level process package; Virtual
processors",
thesaurus = "Message passing; Operating systems [computers];
Parallel programming; Performance evaluation; Resource
allocation",
xxnote = "Check author order.",
}
@InProceedings{Kramer-Fuhrmann:1994:TGP,
author = "O. Kramer-Fuhrmann and L. Schafers and C. Scheidler",
title = "{TRAPPER} --- a graphical programming environment for
parallel systems",
crossref = "Becks:1994:NCT",
pages = "3--15",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "German Nat. Res. Center for Comput. Sci., St.
Augustin, Germany",
classification = "C5440 (Multiprocessing systems); C6115 (Programming
support); C6130B (Graphics techniques); C6180G
(Graphical user interfaces)",
keywords = "Communicating sequential processes; Configtool;
Designtool; Graphical programming environment;
Graphical representation; Hybrid program development;
Machine independent message passing interfaces;
Parallel applications; Parallel Macros; Parallel
systems; Parallel Virtual Machine; PARMACS; Perftool;
Programming model; PVM; Sequential behavior; TRAPPER;
Vistool; Visualization",
thesaurus = "Communicating sequential processes; Graphical user
interfaces; Parallel processing; Programming
environments",
}
@Article{Lazar:1994:SRE,
author = "A. A. Lazar and K. H. Tseng and Koon Seng Lim and W.
Choe",
title = "A scalable and reusable emulator for evaluating the
performance of {SS7} networks",
journal = j-IEEE-J-SEL-AREAS-COMMUN,
volume = "12",
number = "3",
pages = "395--404",
month = apr,
year = "1994",
CODEN = "ISACEM",
DOI = "https://doi.org/10.1109/49.285300",
ISSN = "0733-8716 (print), 1558-0008 (electronic)",
ISSN-L = "0733-8716",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Electr. Eng., Columbia Univ., New York, NY,
USA",
classification = "B6150C (Switching theory); B6210 (Telecommunication
applications); C5620 (Computer networks and
techniques); C5670 (Network performance); C7410F
(Communications)",
fjournal = "IEEE Journal on Selected Areas in Communications",
keywords = "ATM LAN; Emulator design; Engineering workstations;
Fault conditions; OSI Management Information Service
platform; OSIMIS; Parallel Virtual Machine; Performance
evaluation; Public domain software; Reusable emulator;
Scalable emulator; Singapore; SS7 networks; Unbalanced
loading conditions",
thesaurus = "Asynchronous transfer mode; Open systems; Performance
evaluation; Public domain software; Telecommunication
signalling; Telecommunications computing",
}
@TechReport{Lehman:1994:IZP,
author = "Li-wei Lehman",
title = "Integrating {Zipcode} and {PVM}: towards a
higher-level message-passing environment",
type = "Technical report",
number = "MSSU-EIRS-ERC 94-2",
institution = "Engineering Research Center for Computational Field
Simulation, " # inst-MSU,
address = inst-MSU:adr,
pages = "7",
year = "1994",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "PVM (Computer program); Telecommunications --- Message
processing.; Zipcode (Computer program)",
}
@InProceedings{Lin:1994:DNC,
author = "Mengjou Lin and Jehwei Hsieh and D. H. C. Du and J. P.
Thomas and J. A. MacDonald",
title = "Distributed network computing over local {ATM}
networks",
crossref = "IEEE:1994:PSW",
pages = "154--163",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Minnesota Univ., Duluth, MN,
USA",
classification = "C5620L (Local area networks); C6150E (General
utility programs); C6150J (Operating systems); C6150N
(Distributed systems software)",
keywords = "Application programming interfaces; ASX-100 ATM
Switch; Asynchronous transfer mode; BSD socket
programming interface; Communication protocol layer;
Distributed network computing; Distributed programming;
End-to-end communication; Fore Systems ATM API;
High-speed network standards; Local ATM networks;
Message passing library; Parallel matrix
multiplication; Parallel Virtual Machine; Performance
characteristics; Remote Procedure Call; Switch-based
high-speed local area networks; Workstations",
thesaurus = "Application program interfaces; Asynchronous transfer
mode; Local area networks; Matrix multiplication;
Message passing; Telecommunication standards",
}
@InProceedings{Loh:1994:ISR,
author = "B. C. Loh and G. A. Manson",
title = "Incorporating software reuse into the {PCSC}
methodology",
crossref = "deGloria:1994:TAS",
pages = "929--941",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Sheffield Univ., UK",
classification = "C6110B (Software engineering techniques); C6140D
(High level languages)",
keywords = "Abstract representations; Occam 2; Parallel
Communicating Sequential Code; PCSC methodology;
Programming languages; PVM C; Software component reuse;
Software reuse",
thesaurus = "Occam; Software reusability",
}
@InProceedings{Lonsdale:1994:CMH,
author = "G. Lonsdale and J. Clinckemaillie and S. Vlachoutsis
and J. Dubois",
title = "Crash-simulation migration to {HPC} systems",
crossref = "Dekker:1994:MPP",
pages = "439--446",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "ESI GmbH, Eschborn, Germany",
classification = "C4185 (Finite element analysis); C6110P (Parallel
programming); C7440 (Civil and mechanical engineering
computing); C7480 (Production engineering computing)",
keywords = "Algorithmic parallelization; Automatic
parallelization; Automobile crashworthiness simulation
program; Automobile testing; Body shell deformation
calculation; CAD; CAMAS; Car crash simulation; Computer
Aided Migration of Applications System; Digital
simulation; Distributed-memory; FAM; Finite element
model; High performance computing; MIMD; PAM-CRASH;
PAM-STAMP; Parallel programming; Software portability",
thesaurus = "Accidents; Automobile industry; Automobiles; CAD/CAM;
Computer aided engineering; Digital simulation; Finite
element analysis; Mechanical engineering computing;
Parallel programming; Product development; Safety;
Software portability; Testing",
}
@InProceedings{Lonsdale:1994:CRP,
author = "G. Lonsdale and J. Clinckemaillie and S. Vlachoutsis
and J. Dubois",
title = "Communication requirements in parallel crashworthiness
simulation",
crossref = "Gentzsch:1994:HPC",
pages = "55--61",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "ESI GmbH, Eschborn, Germany",
classification = "C4185 (Finite element analysis); C5440
(Multiprocessing systems); C6150N (Distributed systems
software); C6185 (Simulation techniques); C7440 (Civil
and mechanical engineering computing)",
keywords = "Algorithmic features; Communication requirements;
Communications strategy design; Communications strategy
implementation; Distributed-memory MIMD machines;
Global communication; Message-passing; MPI standard;
Overhead minimisation; PAM-CRASH industrial
crashworthiness simulation program, PAM-CRASH; Parallel
crashworthiness simulation; Parallelization approach;
PARMACS; Portable message-passing interfaces; PVM",
thesaurus = "Application program interfaces; Digital simulation;
Distributed memory systems; Finite element analysis;
Message passing; Nonlinear dynamical systems; Parallel
processing; Structural engineering computing",
}
@InProceedings{Maffeis:1994:SSD,
author = "S. Maffeis",
title = "System support for distributed computing",
crossref = "Gentzsch:1994:HPC",
pages = "293--301",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Zurich Univ., Switzerland",
classification = "C6110J (Object-oriented programming); C6115
(Programming support); C6150N (Distributed systems
software)",
keywords = "Distributed computing; Distributed failure-resilient
applications; Distributed parallel computing; ELECTRA;
LINDA systems; Object-groups; Object-oriented
communication; Object-oriented programming; PVM;
Reliable multicast; Toolkit; Transputer system",
thesaurus = "Distributed processing; Object-oriented programming;
Software fault tolerance; Software tools",
}
@InProceedings{Malony:1994:PAP,
author = "A. Malony and B. Mohr and P. Beckman and D. Gannon and
S. Yang and F. Bodin",
title = "Performance analysis of {pC++}: a portable
data-parallel programming system for scalable parallel
computers",
crossref = "Siegal:1994:PEI",
pages = "75--84",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. and Inf. Sci., Oregon Univ., Eugene,
OR, USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C6120 (File
organisation); C6140D (High level languages); C6150G
(Diagnostic, testing, debugging and evaluating
systems)",
keywords = "C++ language extension; Concurrent aggregate
collection classes; Distributed data structures; Embar;
Fast Poisson solver; Memory hierarchy; NAS suite;
Nearest neighbor grid computation; Parallel execution
semantics; Parallel machine; PC++; Performance
analysis; Performance tools; Portable data-parallel
programming system; Scalability measurements; Scalable
parallel computers; Sparse codes",
thesaurus = "C language; Data structures; Parallel languages;
Parallel machines; Parallel programming; Performance
evaluation; Program testing; Software portability",
}
@MastersThesis{Manchek:1994:DIP,
author = "Robert J. Manchek",
title = "Design and implementation of {PVM} version 3",
type = "M.S. thesis",
school = inst-UTK,
address = inst-UTK:adr,
pages = "viii + 81",
year = "1994",
bibdate = "Mon Jan 15 18:16:58 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "Computer networks; Computer software.; Parallel
computers.",
}
@InProceedings{Marin:1994:GAL,
author = "F. J. Marin and O. Trelles-Salazar and F. Sandoval",
title = "Genetic Algorithms on {LAN-Message} Passing
Architectures Using {PVM}: Application to the Routing
Problem",
crossref = "Davidor:1994:PPS",
pages = "534--545 (or 534--543??)",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. de Arquitectura y Tecnologia de Computadores y
Electronica, Malaga Univ., Spain",
classification = "B6150P (Communication network design and planning);
B6210L (Computer communications); C1180 (Optimisation
techniques); C4240C (Computational complexity); C4240P
(Parallel programming and algorithm theory); C5220P
(Parallel architecture); C5620L (Local area networks);
C6115 (Programming support); C6150N (Distributed
systems software); C7410F (Communications computing);
C7430 (Computer engineering)",
corpsource = "Dept. de Arquitectura y Tecnologfa de Computadores y
Electronica, Malaga Univ., Spain",
keywords = "allocation; area networks; combinatorial;
Combinatorial complexity; communication; Communication
latency; complexity; computational complexity; computer
architectures; computing; data communication; data
communications; Data communications; data-passing load;
Data-passing load; dynamic load balancing; Dynamic load
balancing; fault; fault tolerant; Fault tolerant
capabilities; genetic algorithm parallelization;
Genetic algorithm parallelization; genetic algorithms;
independent functions; Independent functions;
information; Information redistribution; integrated
software; integration tool; LAN-based message passing;
LAN-based message passing computer architectures;
latency; local; master node; Master node; message
passing; optimization problem; Optimization problem;
Parallel; parallel algorithms; parallel architectures;
Parallel Virtual Machine; partial results reporting;
Partial results reporting; public domain software;
Public domain software; PVM 3.1; redistribution;
resource; routing problem; Routing problem; server
processors; Server processors; sockets; Sockets;
software; Software integration tool; software tools;
telecommunication computing; telecommunication network
routing; tolerant capabilities; Virtual Machine;
virtual machines",
pubcountry = "Germany",
thesaurus = "Computational complexity; Data communication; Fault
tolerant computing; Genetic algorithms; Integrated
software; Local area networks; Message passing;
Parallel algorithms; Parallel architectures; Public
domain software; Resource allocation; Software tools;
Telecommunication computing; Telecommunication network
routing; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Mattson:1994:PEP,
author = "T. G. Mattson",
title = "Programming Environments for Parallel Computing: a
Comparison of {CPS}, {Linda}, {P4}, {PVM}, {POSYBL}, and
{TCGMSG}",
crossref = "Hesham:1994:PTS",
volume = "II",
pages = "586--594",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Intel Sci. Comput., Beaverton, OR, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
corpsource = "Intel Sci. Comput., Beaverton, OR, USA",
keywords = "Communication times; communication times; Cooperative
Processes Software; CPS; Environment utility;
environment utility; environments; Ethernet network;
four-; Four-node communications tests; Linda; node
communications tests; P4; parallel programming;
performance evaluation; Portable parallel programming
environments; portable parallel programming
environments; POSYBL; programming; PVM;
Reproducibility; reproducibility; software portability;
SPARCstation 1; SPARCstation 1 workstations; TCGMSG;
Theoretical Chemistry Group Message-passing system;
Two-node communication benchmarks; two-node
communication benchmarks; workstations",
sponsororg = "IEEE; ACM; Univ. Hawaii; Univ. Hawaii Coll. Bus.
Admin",
thesaurus = "Parallel programming; Performance evaluation;
Programming environments; Software portability",
treatment = "P Practical; X Experimental",
}
@Article{Matyska:1994:DCS,
author = "Lud{\u{e}}k Matyska and Jaroslav Ko{\v{c}}a",
title = "{D-CICADA}: a software for conformational {PES}
elucidation on network of workstations",
journal = j-J-COMPUT-CHEM,
volume = "15",
number = "9",
pages = "937--946",
month = sep,
year = "1994",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.540150904",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Thu Nov 29 14:54:27 MST 2012",
bibsource = "http://www.interscience.wiley.com/jpages/0192-8651;
http://www.math.utah.edu/pub/tex/bib/jcomputchem1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci., Masaryk Univ., Brno, Czech
Republic",
classification = "A3115 (General mathematical and computational
developments for atoms and molecules); A3190 (Other
topics in the theory of atoms and molecules); A3520B
(General molecular conformation and symmetry; C6110P
(Parallel programming); C7320 (Physics and chemistry
computing); stereochemistry)",
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X",
keywords = "CICADA; Conformational potential energy hypersurface;
Cyclohexane; D-CICADA software; DEC workstations;
Distributed environment; Parallel virtual machine;
Parallelization; Polynomial time; Sun workstations;
Terminally blocked alanine; Virtual machines",
onlinedate = "7 Sep 2004",
thesaurus = "Organic compounds; Organic molecule configurations;
Parallel programming; Physics computing; Potential
energy curves and surfaces of molecules; Virtual
machines",
}
@InProceedings{McKenzie:1994:CIM,
author = "N. R. McKenzie and K. Bolding and C. Ebeling and L.
Snyder",
title = "{CRANIUM}: An Interface for Message Passing on
Adaptive Packet Routing Networks",
crossref = "Bolding:1994:PCR",
pages = "266--280",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6150C (Communication switching); B6210L (Computer
communications); C4230M (Multiprocessor
interconnection); C5220P (Parallel architecture);
C5610N (Network interfaces); C5620 (Computer networks
and techniques)",
conftitle = "Parallel Computer Routing and Communication. First
International Workshop, PCRCW '94",
corpsource = "Dept. of Comput. Sci. and Eng., Washington Univ.,
Seattle, WA, USA",
keywords = "adaptive packet routing networks; arbitrary sequence;
automatic- receive interface; buffer addresses;
Cranium; interconnection network; message passing;
multiprocessor interconnection networks; network
interface; network interfaces; packet serialization;
packet switching; physical node identifiers;
processor-initiated interface; processor-network
interface; user-level programs",
treatment = "P Practical",
}
@Article{McKinney:1994:PGU,
author = "G. W. McKinney",
title = "A practical guide to using {MCNP} with {PVM}",
journal = j-TRANS-AM-NUCL-SOC,
volume = "71",
number = "????",
pages = "397--398",
month = "????",
year = "1994",
CODEN = "TANSAO",
ISSN = "0003-018X",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Los Alamos Nat. Lab., NM, USA",
classification = "A2880F (Radiation monitoring and radiation
protection); C6150N (Distributed systems software);
C7470 (Nuclear engineering computing)",
conflocation = "Washington, DC, USA; 13-17 Nov. 1994",
conftitle = "1994 Winter Meeting of American Nuclear Society
(papers in summary form only received)",
corpsource = "Los Alamos Nat. Lab., NM, USA",
fjournal = "Transactions of the American Nuclear Society",
keywords = "distributed memory systems; distributed-memory
multiprocessing; Distributed-memory multiprocessing;
engineering computing; MCNP; Monte Carlo methods;
nuclear; PVM; radiation protection",
thesaurus = "Distributed memory systems; Monte Carlo methods;
Nuclear engineering computing; Radiation protection",
treatment = "P Practical",
}
@InProceedings{Miller:1994:PPP,
author = "B. P. Miller and J. K. Hollingsworth and M. D.
Callaghan",
title = "The {Paradyn} parallel performance tools and {PVM}",
crossref = "Dongarra:1994:PSW",
pages = "201--210",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6115 (Programming support); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems software)",
corpsource = "Wisconsin Univ., Madison, WI, USA",
keywords = "applications; automated bottleneck searching; dynamic;
heterogeneous program measurement; instrumentation;
large-scale parallel applications; long-; machines;
native PVM; Paradyn; parallel performance tools;
parallel programming; performance problem causes;
production-sized data sets; program diagnostics;
running applications; software metrics; software
performance evaluation; software tools; Sun; Thinking
Machine CM-5; virtual; workstations",
treatment = "P Practical",
}
@InProceedings{Miller:1994:PPT,
author = "B. P. Miller and J. K. Hollingworth and M. D.
Callaghan",
title = "The {Paradyn} Performance Tools and {PVM}",
crossref = "Dongarra:1994:PSW",
pages = "201--210",
year = "1994",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@MastersThesis{Nemer-Preece:1994:LBH,
author = "Nicole Anne Nemer-Preece",
title = "Load balancing the heat equation in a heterogeneous
environment with {PVM}",
type = "M.S. thesis",
school = "University of Missouri, Rolla",
address = "Rolla, MO, USA",
pages = "viii + 52",
year = "1994",
bibdate = "Mon Jan 15 18:17:04 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Neun:1994:UPB,
author = "W. Neun",
title = "Using {PVM} based software for parallel computation in
Computer Algebra",
crossref = "Calmet:1994:RWC",
pages = "46--51",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Konrad-Zuse-Zentrum fur Informationstech. Berlin,
Germany",
classification = "C4240P (Parallel programming and algorithm theory);
C7310 (Mathematics computing)",
corpsource = "Konrad-Zuse-Zentrum fur Informationstech. Berlin,
Germany",
keywords = "computer algebra; Computer algebra; manipulation;
mathematics computing; network topology; Network
topology; parallel algorithms; Parallel algorithms;
parallel computation; Parallel computation; parallel
machines; Parallel machines; performance benefit;
Performance benefit; PVM based software; symbol",
pubcountry = "Germany",
sponsororg = "Univ. Karlsruhe",
thesaurus = "Mathematics computing; Parallel algorithms; Symbol
manipulation",
treatment = "P Practical",
}
@InProceedings{Nguyen:1994:DCE,
author = "S. T. Nguyen and B. J. Zook and Xiaodong Zhang",
title = "Distributed computation of electromagnetic scattering
problems using finite-difference time-domain
decompositions",
crossref = "IEEE:1994:PTI",
pages = "85--93",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Instrum. and Space Res., Southwest Res.
Inst., San Antonio, TX, USA",
classification = "A0260 (Numerical approximation and analysis); A4110H
(Electromagnetic waves: theory); B0290P (Differential
equations); B5210 (Electromagnetic wave propagation);
C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory); C7320 (Physics and
chemistry computing)",
keywords = "Communication pattern variations; Computing
performance; Distributed computation; Distributed
memory; Distributed workstation network;
Electromagnetic scattering problems; Finite-difference
time-domain decompositions; Load balancing; Numerical
method; Parallelism; Partial differential equations;
PVM; Scalability",
thesaurus = "Distributed algorithms; Distributed memory systems;
Electromagnetic wave scattering; Finite difference
time-domain analysis; Partial differential equations;
Physics computing",
}
@InProceedings{Nordling:1994:SOD,
author = "P. Nordling and P. Fritzson",
title = "Solving ordinary differential equations on parallel
computers --- applied to dynamic rolling bearings
simulation",
crossref = "Dongarra:1994:PSC",
pages = "397--415",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. and Inf. Sci., Linkoping Univ.,
Sweden",
classification = "C4170 (Differential equations); C6150N (Distributed
systems software); C6185 (Simulation techniques); C7440
(Civil and mechanical engineering computing)",
keywords = "Dynamic rolling bearings simulation; Ethernet;
Fine-grained synchronization; Initial value problems;
LSODA; MIMD parallel computers; Ordinary differential
equation solution; Parallelism; PARIX operating system;
Parsytec GigaCube; PVM; Solaris 2.3; SPARC 10
workstation cluster; Speedup; Sun SPARCcenter 2000",
thesaurus = "Differential equations; Digital simulation; Initial
value problems; Machine bearings; Mechanical
engineering computing; Parallel processing",
}
@InProceedings{Otto:1994:PVM,
author = "S. W. Otto",
title = "Processor Virtualization and Migration for {PVM}",
crossref = "Dongarra:1994:PSW",
pages = "66--75",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software); C7430 (Computer engineering)",
corpsource = "Dept. of Comput. Sci. and Eng., Oregon Graduate Inst.
of Sci. and Technol., Portland, OR, USA",
keywords = "context switch; distributed memory systems;
distributed scheduling systems; interoperability; local
communication speeds; Machine; machines; message
passing; Migratable PVM; multi; parallel; Parallel
Virtual; performance figures; process granularity;
process level MPVM; processor virtualization;
programming model; run realistic applications; semantic
restrictions; threaded version; times; transparent
migration; transparent migration mechanisms; virtual
machines; work migration",
treatment = "P Practical",
}
@Article{Phan-Thien:1994:CDL,
author = "N. Phan-Thien and D. Tullock",
title = "Completed double layer boundary element method in
elasticity and {Stokes} flow: Distributed computing
through {PVM}",
journal = j-COMP-MECH,
volume = "14",
number = "4",
pages = "370--383",
month = jul,
year = "1994",
CODEN = "CMMEEE",
ISSN = "0178-7675",
bibdate = "Sat Apr 06 15:05:19 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Pierce:1994:NMP,
author = "P. Pierce",
title = "The {NX} message passing interface",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "4",
pages = "463--480",
month = apr,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems)",
corpsource = "Intel Corp., Beaverton, OR, USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "communication model; design tradeoffs; distributed
memory systems; high performance; high performance
interface; Intel multicomputers; massively parallel
distributed memory supercomputers; message passing;
multicomputer message passing; NX interface; NX message
passing interface; parallel applications; parallel
programming; performance; programming environments;
typed send/receive model; usability; vendor- supplied
programming interface",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@InProceedings{Pierce:1994:PIN,
author = "P. Pierce and G. Regnier",
title = "The {Paragon} implementation of the {NX} message
passing interface",
crossref = "Pierce:1994:PSH",
pages = "184--190",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C6115
(Programming support); C6150J (Operating systems);
C6150N (Distributed systems)",
conftitle = "Proceedings of IEEE Scalable High Performance
Computing Conference",
corpsource = "Intel Supercomput. Syst. Div., Beaverton, OR, USA",
keywords = "hardware; high performance message passing; message
passing; message passing design; message passing
interface; NX; operating system; operating systems
(computers); OSF/1; Paragon; parallel architectures;
parallel machines; parallel supercomputer; performance;
programming environment; programming environments",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Supercomput.
Appl",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Pozo:1994:FTE,
author = "R. Pozo and K. Remington",
title = "Fast three-dimensional elliptic solvers on distributed
network clusters",
crossref = "Joubert:1994:PCT",
pages = "201--208",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
classification = "C4130 (Interpolation and function approximation);
C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory)",
keywords = "Distributed network clusters; Elliptic solvers;
Object-oriented message passing interface; Parallel
architecture; Parallel architectures; Spline
collocation",
thesaurus = "Distributed algorithms; Elliptic equations; Splines
[mathematics]",
}
@InProceedings{Puthukattukaran:1994:DIP,
author = "J. Puthukattukaran and S. Chalasani and P. Senapathy",
title = "Design and implementation of parallel algorithms for
gene-finding",
crossref = "IEEE:1994:PTI",
pages = "186--193",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Electr. and Comput. Eng., Wisconsin Univ.,
Madison, WI, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessing systems); C7330 (Biology and
medical computing)",
keywords = "CM-5 multicomputer; DNA sequences; Gene-finding; HP
Apollo workstations; Human Genome project; Parallel
algorithm; Parallel algorithm design; Parallel
gene-finding algorithm; Parallel Virtual Machine; PVM;
Serial algorithm; Software package",
thesaurus = "Biology computing; Cellular biophysics; DNA; Parallel
algorithms; Parallel machines",
}
@Article{Reale:1994:PCU,
author = "F. Reale and F. Bocchino and S. Sciortino",
title = "Parallel computing on {Unix} workstation arrays",
journal = j-COMP-PHYS-COMM,
volume = "83",
number = "2-3",
pages = "130--140",
month = dec,
year = "1994",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Istituto e Osservatorie Astron., Palermo, Italy",
classification = "A9575P (Mathematical and computer techniques in
astronomy); C5620L (Local area networks); C6110P
(Parallel programming); C6115 (Programming support);
C6150J (Operating systems); C6150N (Distributed systems
software); C7350 (Astronomy and astrophysics
computing)",
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
keywords = "2D hydrodynamic code; Alpha processors; Astrophysical
flows; Data-domain decomposition; DECstations 3000/400;
DECstations 5000/200; Dedicated MIMD parallel system;
Ethernet LAN; FDDI LAN; Intel i860 processors; Massive
parallel computations; Meiko Computing Surface; MIMD
systems; Network bandwidth; Nondedicated parallel
systems; Parallel computing; Parallelization library;
Processor power; PVM software toolset; Software; Unix
workstation arrays",
pubcountry = "Netherlands",
thesaurus = "Astronomy computing; Astrophysical fluid dynamics;
Local area networks; Message passing; Parallel
programming; Protocols; Software packages; Unix;
Workstations",
}
@InProceedings{Rolfe:1994:PAP,
author = "T. J. Rolfe",
title = "{PVM}: An Affordable Parallel Processing Environment",
crossref = "Anonymous:1994:SCC",
pages = "118--125",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Saarinen:1994:EES,
author = "S. Saarinen",
title = "{EASYPVM} --- An Enhanced Subroutine Library for
{PVM}",
crossref = "Gentzsch:1994:HPC",
volume = "2",
pages = "267--272",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Center for Sci. Comput., Espoo, Finland",
classification = "C5440 (Multiprocessing systems); C6110B (Software
engineering techniques); C6110P (Parallel programming);
C6150N (Distributed systems software); C7430 (Computer
engineering)",
corpsource = "Center for Sci. Comput., Espoo, Finland",
keywords = "approach; clear message passing programming; Clear
message passing programming approach; EASYPVM; Enhanced
subroutine library; enhanced subroutine library; global
communication; Global communication routines; library;
message passing; Message passing calls; message passing
calls; Message passing library; parallel machines;
parallel programming; Parallel virtual machine;
parallel virtual machine; PICL/ParaGraph message
tracing postprocessor; Process creation; process
creation; PVM; PVM message passing; PVM message passing
syntax; routines; software libraries; syntax; virtual
machines",
pubcountry = "Germany",
thesaurus = "Message passing; Parallel machines; Parallel
programming; Software libraries; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Scales:1994:DES,
author = "D. J. Scales and M. S. Lam",
title = "The design and evaluation of a shared object system
for distributed memory machines",
crossref = "USENIX:1994:PFU",
pages = "101--114",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Comput. Syst. Lab., Stanford Univ., CA, USA",
classification = "C5440 (Multiprocessing systems); C6120 (File
organisation); C6150N (Distributed systems software)",
keywords = "Automatic caching; CM-5; Data access; Data prefetch;
Distributed memory machines; Global name space; High
communication overheads; IBM SP1; Intel iPSC/860;
Paragon; Parallel algorithm; Performance; Portable
run-time system; Remote processors; SAM; Scientific
algorithms; Shared data; Shared object system;
Synchronization; System design; System evaluation;
Workstations",
thesaurus = "Cache storage; Distributed memory systems; Operating
systems [computers]; Parallel algorithms; Parallel
machines; Shared memory systems; Synchronisation",
}
@Article{Schmidt:1994:EAO,
author = "B. K. Schmidt and V. S. Sunderam",
title = "Empirical analysis of overheads in cluster
environments",
journal = j-CPE,
volume = "6",
number = "1",
pages = "1--32",
month = feb,
year = "1994",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
classification = "C4230M (Multiprocessor interconnection); C5220P
(Parallel architecture); C5470 (Performance evaluation
and testing)",
fjournal = "Concurrency, practice and experience",
keywords = "Cluster environments; Communication delay; Concurrent
computing; Heterogeneous processing elements; Load
imbalance; Parallelism model; Partitioning strategies;
Performance; PVM network computing system; Throughput",
pubcountry = "UK",
thesaurus = "Multiprocessing systems; Multiprocessor
interconnection networks; Performance evaluation",
}
@InProceedings{Schmidt:1994:IAP,
author = "M. Schmidt and R. Hanisch",
title = "Implementation of an air pollution transport model on
parallel hardware",
crossref = "Dekker:1994:MPP",
pages = "277--284",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "GMD-FIRST, Berlin, Germany",
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software); C6185 (Simulation techniques); C7320
(Physics and chemistry computing)",
keywords = "Air pollution analysis; Air pollution transport model;
Berlin; Complex numerical models; Conurbations; MANNA
computer; Operational management; Parallel computer;
Parallel hardware; Programming interface; PVM; Run time
measurements; Simulation environment; Simulation
system; Smog situations; Urban planning",
thesaurus = "Air pollution; Digital simulation; Flow simulation;
Message passing; Parallel machines; Parallel
programming; Physics computing; Town and country
planning; Transport processes",
}
@TechReport{Schneenman:1994:DSS,
author = "Richard D. Schneenman",
title = "Distributed supercomputing software: experiences with
the parallel virtual machine --- {PVM}",
number = "NISTIR 5381",
institution = "U.S. Dept. of Commerce, National Institute of
Standards and Technology",
address = "Gaithersburg, MD, USA",
pages = "vi + 18",
year = "1994",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
annote = "March 1994.",
}
@Article{Schoinas:1994:FGA,
author = "Ioannis Schoinas and Babak Falsafi and Alvin R. Lebeck
and Steven K. Reinhardt and James R. Larus and David A.
Wood",
title = "Fine-grain access control for distributed shared
memory",
journal = j-SIGPLAN,
volume = "29",
number = "11",
pages = "297--306",
month = nov,
year = "1994",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat May 1 15:50:17 MDT 1999",
bibsource = "http://www.acm.org/pubs/toc/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org:80/pubs/citations/proceedings/asplos/195473/p297-schoinas/",
abstract = "This paper discusses implementations of fine-grain
memory access control, which selectively restricts
reads and writes to cache-block-sized memory regions.
Fine-grain access control forms the basis of efficient
cache-coherent shared memory. This paper focuses on
low-cost implementations that require little or no
additional hardware. These techniques permit efficient
implementation of shared memory on a wide range of
parallel systems, thereby providing shared-memory codes
with a portability previously limited to message
passing. This paper categorizes techniques based on
where access control is enforced and where access
conflicts are handled. We incorporated three techniques
that require no additional hardware into Blizzard, a
system that supports distributed shared memory on the
CM-5. The first adds a software lookup before each
shared-memory reference by modifying the program's
executable. The second uses the memory's error
correcting code (ECC) as cache-block valid bits. The
third is a hybrid. The software technique ranged from
slightly faster to two times slower than the ECC
approach. Blizzard's performance is roughly comparable
to a hardware shared-memory machine. These results
argue that clusters of workstations or personal
computers with networks comparable to the CM-5's will
be able to support the same shared-memory interfaces as
supercomputers.",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5320G
(Semiconductor storage); C5440 (Multiprocessing
systems); C6120 (File organisation)",
conflocation = "San Jose, CA, USA; 4-7 Oct. 1994",
conftitle = "Sixth International Conference on Architectural
Support for Programming Languages and Operating Systems
(ASPLOS-VI)",
corpsource = "Dept. of Comput. Sci., Wisconsin Univ., Madison, WI,
USA",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "access conflicts; Blizzard; block-sized memory
regions; cache block valid bits; cache storage;
cache-block-sized memory regions; cache-coherent shared
memory; CM-5; design; distributed memory systems;
distributed shared memory; ECC approach; error
correcting code; fine-grain access control; low-cost
implementations; measurement; parallel machines;
parallel systems; performance; portability; security;
shared memory codes; shared memory interfaces; shared
memory reference; shared memory systems; software
lookup; storage management; supercomputers; theory",
sponsororg = "ACM; IEEE Comput. Soc",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Shared memory. {\bf D.4.2} Software, OPERATING
SYSTEMS, Storage Management, Distributed memories. {\bf
D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Access controls.",
treatment = "P Practical",
}
@InProceedings{Seyfarth:1994:GEE,
author = "B. R. Seyfarth and J. L. Bickham and M. R. Fernandez",
title = "Glenda: an environment for easy parallel programming",
crossref = "Pierce:1994:PSH",
pages = "637--641",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Southern Mississippi Univ.,
Hattiesburg, MS, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C7320 (Physics and Chemistry); C7430
(Computer engineering)",
keywords = "Benchmark; C programming language; Communication
functions; Coordination language; Glenda; Global tuple
space; Parallel programming environment; Parallel
Virtual Machine; Preprocessor; PVM message passing
functions; Software package; Tuple server process;
Underwater acoustic modeling",
thesaurus = "Acoustic analysis; File servers; Message passing;
Parallel programming; Physics computing; Programming
environments; Underwater sound; Virtual machines",
}
@InProceedings{Shee:1994:DMA,
author = "Jang Chung Shee and Chao Chin Wu and Lin Wen You and
Cheng Chen",
title = "Design of a multithread architecture and its parallel
simulation and evaluation environment",
crossref = "Anonymous:1994:ICS",
pages = "69--76 (vol. 1)",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci. and Inf. Eng., Nat. Chiao Tung
Univ., Hsinchu, Taiwan",
classification = "C5220P (Parallel architecture); C6115 (Programming
support); C6185 (Simulation techniques)",
keywords = "Context switch; Integrated multiprocessing simulation
environment; Multithread architecture; Parallel
simulation; Parallel simulation and evaluation
environment; Parallel Virtual Machine; SUN SPARC
workstations; Thread-related instructions",
thesaurus = "Digital simulation; Parallel architectures;
Programming environments",
}
@InProceedings{Shelton:1994:FPS,
author = "W. A. Shelton and G. M. Stocks and F. J. Pinski and R.
G. Jordan and Y. Liu and L. Qui and J. B. Staunton and
D. D. Johnson and B. Ginatempo",
title = "First principles simulation of materials properties",
crossref = "Pierce:1994:PSH",
pages = "103--110",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Nat. Lab., TN, USA",
classification = "A3100 (Theory of atoms and molecules); C5440
(Multiprocessor systems and techniques); C6110P
(Parallel programming); C6185 (Simulation techniques);
C7320 (Physics and Chemistry)",
keywords = "Ag-Mg alloy system; Electronic origin; Electronic
structure; First principles simulation; High
performance workstations; Local computer environment;
Materials properties; Order-disorder temperature;
Ordered materials; Parallel computer code; Physics
computing; PVM3 3; Short-range order intensity;
Substitutionally disordered materials; Vector
supercomputers",
thesaurus = "Digital simulation; Fermi surface; Materials
properties; Molecular electronic states; Parallel
machines; Parallel programming; Physics; Physics
computing",
}
@InProceedings{Shing:1994:UPC,
author = "C.-C. Shing",
title = "Use {PVM} on computation of analysis of repeated
measurement designs",
crossref = "Sall:1994:CIS",
pages = "139--142",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C1140Z (Other topics in statistics); C5440
(Multiprocessing systems); C6110P (Parallel
programming); C7310 (Mathematics computing)",
corpsource = "Dept. of Comput. Sci., Radford Univ., VA, USA",
keywords = "computation; concurrent; designed experiments;
distributed memory; distributed memory systems;
heterogeneous network; parallel computer; parallel
programming; parallelized sweep operator; PVM;
regression; repeated measurement designs; software
package; statistical analysis; sweep operation",
sponsororg = "Interface Found. North America",
treatment = "P Practical",
}
@Article{Skjellum:1994:DEZ,
author = "A. Skjellum and S. G. Smith and N. E. Doss and A. P.
Leung and M. Morari",
title = "The design and evolution of {Zipcode}",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "4",
pages = "565--596",
day = "31",
month = mar,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
classification = "C5440 (Multiprocessor systems and techniques);
C6110P (Parallel programming); C6120 (File
organisation); C6150N (Distributed systems)",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "Collective operations; Communication contexts;
Gather-send; Homogeneous computer networks; Large-scale
multicomputer software; Mailer data structure; Message
passing; MPI standard; Multicomputers; Point-to-point
communication; Process-management system;
Receive-scatter semantics; Runtime optimizations;
Static process groups; Virtual topologies; Zipcode",
pubcountry = "Netherlands",
thesaurus = "Data structures; Message passing; Multiprocessing
systems; Parallel programming",
}
@InProceedings{Skjellum:1994:WLM,
author = "A. Skjellum and N. E. Doss and P. V. Bangalore",
title = "Writing libraries in {MPI}",
crossref = "IEEE:1994:PSP",
pages = "166--173",
year = "1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
classification = "C4140 (Linear algebra); C6110P (Parallel
programming); C6150N (Distributed systems software);
C6150N (Distributed systems)",
conftitle = "Proceedings of Scalable Parallel Libraries
Conference",
corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
keywords = "cluster; Cluster; code fragments; Code fragments;
linear algebra; linear algebra library; Linear algebra
library; message passing; Message passing;
message-passing systems; Message-passing systems; MPI;
multicomputer; Multicomputer; parallel libraries;
Parallel libraries; parallel programming; standard;
Standard; subroutines; virtual topology; Virtual
topology",
sponsororg = "Mississippi State Univ.; Nat. Sci. Found",
thesaurus = "Linear algebra; Message passing; Parallel programming;
Subroutines",
treatment = "P Practical",
}
@InProceedings{Sloot:1994:CIO,
author = "P. M. A. Sloot and A. G. Hoekstra and L. O.
Hertzberger",
title = "A comparison of the {Iserver-Occam}, {Parix},
{Express}, and {PVM} programming environments on a
{Parsytec GCel}",
crossref = "Gentzsch:1994:HPC",
volume = "2",
pages = "253--259",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240C (Computational complexity); C6110P (Parallel
programming); C6115 (Programming support)",
corpsource = "Dept. of Comput. Syst., Amsterdam Univ., Netherlands",
keywords = "communication capabilities; computational complexity;
development time; environments; Express; floating point
performance; global communication times; Iserver-Occam;
parallel programming; Parix; Parsytec GCel; point to
point communication; portability; programmability;
programming; PVM; software performance evaluation; time
complexity analysis; times",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Sloot:1994:CIP,
author = "P. M. A. Sloot and A. G. Hoekstra and L. O.
Hertzberger",
title = "A Comparison of the {Iserver-Occam}, {Parix},
{Express}, and {PVM} Programming Environments on a
{Parsytec GCel}",
crossref = "Gentzsch:1994:HPC",
pages = "253--259",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Syst., Amsterdam Univ., Netherlands",
classification = "C4240C (Computational complexity); C6110P (Parallel
programming); C6115 (Programming support)",
keywords = "Communication capabilities; Development time; Express;
Floating point performance; Global communication times;
Iserver-Occam; Parallel programming environments;
Parix; Parsytec GCel; Point to point communication
times; Portability; Programmability; PVM; Time
complexity analysis",
thesaurus = "Computational complexity; Parallel programming;
Programming environments; Software performance
evaluation",
}
@InProceedings{Stephens:1994:PBT,
author = "R. Stephens",
title = "Parallel benchmarks on the {Transtech Paramid}
supercomputer",
crossref = "deGloria:1994:TAS",
pages = "136--146",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing)",
keywords = "Application fields; Distributed memory parallel
supercomputer; GENESIS; Intel i860-XP processors; NAS
suites; Parallel benchmarks; PARMACS codes; Portable
parallel codes; Portable PVM; Transtech Paramid
supercomputer; Workstation clusters",
thesaurus = "Distributed memory systems; Parallel processing;
Performance evaluation",
}
@Article{Still:1994:PPC,
author = "C. H. Still",
title = "Portable parallel computing via the {MPI1}
message-passing standard",
journal = j-COMPUT-PHYS,
volume = "8",
number = "5",
pages = "533--536, 538--539",
month = sep # "--" # oct,
year = "1994",
CODEN = "CPHYE2",
ISSN = "0894-1866 (print), 1558-4208 (electronic)",
ISSN-L = "0894-1866",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Lasers and Energy Comput. Div., Lawrence Livermore
Nat. Lab., CA, USA",
classification = "C6110P (Parallel programming); C6140D (High level
languages); C6150N (Distributed systems software)",
fjournal = "Computers in Physics",
keywords = "C languages binding; C++ bindings; Communicator;
Fortran binding; Functionality; Hardware;
Message-passing routine library; MPI1 message-passing
standard; Portable parallel computing; Receive routine;
Send routine; Vendor-independent message-passing
library",
thesaurus = "C language; FORTRAN; Message passing; Object-oriented
languages; Parallel programming",
}
@Article{Stone:1994:PSO,
author = "L. C. Stone and S. B. Shukla and B. Neta",
title = "Parallel satellite orbit prediction using a
workstation cluster",
journal = j-COMPUT-MATH-APPL,
volume = "28",
number = "8",
pages = "1--8",
month = oct,
year = "1994",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Naval Postgraduate Sch., Monterey, CA, USA",
classification = "A9385 (Instrumentation and techniques for
geophysical, hydrospheric and lower atmosphere
research); A9575P (Mathematical and computer
techniques); C5440 (Multiprocessor systems and
techniques); C7350 (Astronomy and astrophysics)",
fjournal = "Computers and Mathematics with Applications",
keywords = "Function decomposition techniques; Parallel computing;
Parallel satellite orbit prediction; Parallel Virtual
Machine; Performance metric; SUN workstations;
Workstation cluster",
pubcountry = "UK",
thesaurus = "Artificial satellites; Astronomy computing; Parallel
processing; Workstations",
}
@Article{Strok:1994:NJI,
author = "Dale C. Strok",
title = "In the News: {Jupiter} impacts: Resolution makes a big
difference. Supercomputer farming down under. {HPF
Forum} welcomes comments. {Smithsonian Awards} honor
computational scientists. Low-life computer viruses.
{PVM} developers get {R\&D-100} award. The eyes have
it. Neural nets detect breast cancer. Better cars
through cooperation. Parallel version of global climate
model. {Lockheed} to run {Idaho National Engineering
Lab}. Public-private partners: new drugs, new
software",
journal = j-IEEE-COMPUT-SCI-ENG,
volume = "1",
number = "3",
pages = "88--90",
month = "Fall",
year = "1994",
CODEN = "ISCEE4",
ISSN = "1070-9924 (print), 1558-190X (electronic)",
ISSN-L = "1070-9924",
bibdate = "Sat May 25 13:29:25 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computational Science \& Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99",
}
@InProceedings{Sunderam:1994:GPP,
author = "V. Sunderam",
title = "General Purpose Parallel Computing with {PVM}",
crossref = "Anonymous:1994:PPC",
pages = "185--198",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Sunderam:1994:MSH,
author = "V. S. Sunderam",
title = "Methodologies and systems for heterogeneous concurrent
computing",
crossref = "Joubert:1994:PCT",
pages = "29--45",
year = "1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C6150N (Distributed systems software)",
keywords = "Heterogeneous concurrent computing; Parallel
algorithm; Parallel processing; Partitioning;
Performance aspects; PVM system; Scheduling",
thesaurus = "Parallel algorithms; Scheduling",
}
@Article{Sunderam:1994:PCC,
author = "V. S. Sunderam and G. A. Geist and J. Dongarra and R.
Manchek",
title = "The {PVM} concurrent computing system: Evolution,
experiences, and trends",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "4",
pages = "531--545",
day = "31",
month = mar,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:14:00 MDT 1999",
bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1994&volume=20&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1994&volume=20&issue=4&aid=861",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
classification = "B6210L (Computer communications); C5620 (Computer
networks and techniques); C6110P (Parallel
programming); C6150N (Distributed systems)",
corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "auxiliary facilities; Auxiliary facilities; case
studies; Case studies; climate modeling; Climate
modeling; communication overheads; Communication
overheads; computer networks; computing model;
Computing model; environmental science; Environmental
science; experimental enhancements; Experimental
enhancements; heterogeneous; heterogeneous concurrent
computing; Heterogeneous concurrent computing;
Heterogeneous networked computing platforms; interface;
large scale scientific supercomputing; Large scale
scientific supercomputing; materials science; Materials
science; message passing; message passing model;
Message passing model; MPP; MPP support; network
computing; Network computing; networked computing
platforms; networked environments; Networked
environments; parallel processing; Parallel processing;
parallel programming; process groups; Process groups;
programming; Programming interface; PVM concurrent
computing system; software framework; Software
framework; support",
pubcountry = "Netherlands",
thesaurus = "Computer networks; Message passing; Parallel
programming",
treatment = "P Practical",
}
@InProceedings{Sydow:1994:PSA,
author = "A. Sydow",
title = "Parallel simulation of air pollution",
crossref = "Pehrson:1994:IPP",
journal = j-IFIP-TRANS-A,
volume = "A-52",
pages = "605--612",
year = "1994",
CODEN = "ITATEC",
ISSN = "0926-5473",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "GMD-Res. Inst. for Comput. Archit. and Software
Technol., Berlin, Germany",
classification = "A8670G (Atmosphere); A9260T (Air quality and air
pollution); A9365 (Data acquisition, processing and
storage); C6110P (Parallel programming); C7340
(Geophysics)",
fjournal = "IFIP Transactions. A. Computer Science and
Technology",
keywords = "Air pollutant transport models; Air pollution
modelling; Berlin-Brandenburg area, Germany; CM-5;
Equations; Eulerian models; FORGE; Lagrangian models;
MANNA; Meteorological models; Model domain
decomposition method; Model parallelization; Numerical
algorithms; Parallel hardware; Parallel simulation;
PARMACS; PVM; Runtime measurements; Software tools;
Transputer system; Workstation cluster",
thesaurus = "Air pollution; Digital simulation; Environmental
science computing; Geophysics computing; Numerical
analysis; Parallel processing; Software tools",
}
@InProceedings{Thomas:1994:PSA,
author = "S. J. Thomas and J. Cote",
title = "Parallel {Semi-Lagrangian} Advection using {PVM}",
crossref = "Dekker:1994:MPP",
pages = "801--808",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. de Recherche en Prevision Numerique,
Environnement Canada, Dorval, Que., Canada",
classification = "A0260 (Numerical approximation and analysis); A0340G
(Fluid dynamics: general mathematical aspects); A4710
(General fluid dynamics theory, simulation and other
computational methods); C4160 (Numerical integration
and differentiation); C4240P (Parallel programming and
algorithm theory); C5440 (Multiprocessing systems);
C6150N (Distributed systems software); C7320 (Physics
and chemistry computing)",
keywords = "Computational fluid dynamics; Courant Friedrichs Lewy
condition; Distributed MIMD implementation; Eulerian
methods; Intel iPSC/860; Parallel algorithms; Parallel
performance; Parallel semi-Lagrangian advection;
Passive advection problem; Processor; PVM; Scalable
code; Sub-grid dimensions; Time steps",
thesaurus = "Distributed memory systems; Fluid dynamics;
Integration; Parallel algorithms; Physics computing;
Software performance evaluation; Transport processes",
}
@InProceedings{Thomsen:1994:RTS,
author = "P. G. Thomsen",
title = "Real time simulation in a cluster computing
environment",
crossref = "Dongarra:1994:PSC",
pages = "493--497",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. for Math. Modelling, Tech. Univ. Denmark,
Lyngby, Denmark",
classification = "C5620L (Local area networks); C6150J (Operating
systems); C6185 (Simulation techniques); C7460
(Aerospace engineering computing); C7810C
(Computer-aided instruction)",
keywords = "Airplane flying; Cluster computing environment;
Differential algebraic equations; FDDI-ring;
Mathematical problem; Personnel training; PVM; Real
time simulation; Real time update; Ship manoeuvering;
Simulator design; Systems variables; Workstation
cluster",
thesaurus = "Aerospace simulation; Aircraft; Computer based
training; Digital simulation; FDDI; Local area
networks; Operating systems [computers]; Personnel;
Real-time systems; Ships; Workstations",
}
@InProceedings{Trefftz:1994:DPE,
author = "C. Trefftz and C. C. Huang and P. K. McKinley and T.
Y. Li and Z. Zeng",
title = "Design and performance evaluation of a distributed
eigenvalue solver on a workstation cluster",
crossref = "IEEE:1994:IPN",
pages = "608--615",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
classification = "C4140 (Linear algebra); C4240P (Parallel programming
and algorithm theory); C5470 (Performance evaluation
and testing); C6110P (Parallel programming)",
keywords = "Bisection algorithm; Distributed eigenvalue solver;
High-performance workstations; Interprocess
communication packages; Laguerre iteration; P4;
Parallel algorithm; Parallel scientific computing;
Parallel split-merge; Performance evaluation;
Performance study; PVM; Rank two splitting; Separation
property; Split-merge technique; Standard matrix types;
Symmetric tridiagonal matrices; Workstation cluster",
thesaurus = "Eigenvalues and eigenfunctions; Parallel algorithms;
Parallel programming; Performance evaluation;
Workstations",
}
@InProceedings{Trelles-Salazar:1994:MSS,
author = "O. Trelles-Salazar and E. L. Zapata and J.-M. Carazo",
title = "Mapping strategies for sequential sequence comparison
algorithms on {LAN-based} message passing
architectures",
crossref = "Gentzsch:1994:HPC",
pages = "197--202",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Archit., Malaga Univ., Spain",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5620L (Local area
networks); C5630 (Networking equipment); C6110B
(Software engineering techniques); C6110P (Parallel
programming); C6150N (Distributed systems software);
C6160Z (Other DBMS); C7330 (Biology and medical
computing)",
keywords = "Communication latency; Dynamic load balancing; Fault
tolerant capabilities; File server; Guided self
scheduling; LAN-based message passing architectures;
Mapping strategies; Overall data-passing load;
Public-domain PVM 3.1 system; Sequential sequence
comparison algorithms; Simple workstation clusters;
Socket to socket communications; Software-integration
tool",
thesaurus = "Biology computing; Computer architecture; File
servers; Local area networks; Message passing; Parallel
programming; Resource allocation; Scheduling;
Sequences; Software fault tolerance; Software
portability; Very large databases; Workstations",
}
@InProceedings{Uhl:1994:PCC,
author = "A. Uhl",
title = "Parallel Compact Coding of Satellite Images with
Wavelet Packets using {PVM}",
crossref = "Kumar:1994:PPI",
pages = "382--387",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Varadarajan:1994:FDT,
author = "V. Varadarajan and R. Mittra",
title = "Finite-difference time-domain ({FDTD}) analysis using
distributed computing",
journal = j-IEEE-MICROW-GUIDED-WAVE-LETT,
volume = "4",
number = "5",
pages = "144--145",
month = sep # "\slash " # oct,
year = "1994",
CODEN = "IMGLE3",
DOI = "https://doi.org/10.1109/75.289515",
ISSN = "1051-8207 (print), 1558-2329 (electronic)",
ISSN-L = "1051-8207",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Electromagnetic Commun. Lab., Illinois Univ.,
Champaign, IL, USA",
classification = "B0290Z (Other numerical methods); B5100 (Electric
and magnetic fields); B5200 (Electromagnetic waves,
antennas and propagation); C4190 (Other numerical
methods); C7310 (Mathematics); C7410D (Electronic
engineering)",
fjournal = "IEEE Microwave and Guided Wave Letters",
keywords = "Electromagnetics; FDTD calculations; Finite-difference
time-domain analysis; Linear speedup; Parallel
distributed computing; Parallel Virtual Machine; PVM
3.2; Three-dimensional rectangular cavity",
thesaurus = "Cavity resonators; Distributed processing;
Electromagnetic field theory; Electronic engineering
computing; Finite difference time-domain analysis;
Mathematics computing; Parallel algorithms",
}
@InProceedings{Vaughan:1994:MPM,
author = "P. L. Vaughan and A. Skjellum and D. S. Reese and
Fei-Chen Cheng",
title = "Migrating from {PVM} to {MPI}. {I}. The {Unify}
system",
crossref = "IEEE:1994:FSF",
pages = "488--495",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "NSF Eng. Res. Center for Computational Field
Simulation, Mississippi State Univ., MS, USA",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "NSF Eng. Res. Center for Computational Field
Simulation, Mississippi State Univ., MS, USA",
keywords = "evolution path; Evolution path; message passing;
message Passing Interface; Message Passing Interface;
Message passing system; MPI; parallel libraries;
Parallel libraries; parallel programming; portability
system; Portability system; PVM; software portability;
standard notation; Standard notation; system; Unify
system",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Comput. Archit.;
NASA; Univ. Maryland Inst. Adv. Comput. Studies; George
Mason Univ",
thesaurus = "Message passing; Parallel programming; Software
portability",
treatment = "P Practical",
}
@InProceedings{vonHanxleden:1994:VDF,
author = "R. von Hanxleden and K. Kennedy and J. Saltz",
title = "Value-based distributions in {Fortran D}",
crossref = "Gentzsch:1994:HPC",
pages = "434--440",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Rice Univ., Houston, TX, USA",
classification = "C6110P (Parallel programming); C6120 (File
organisation); C6140D (High level languages); C6150C
(Compilers, interpreters and other processors)",
keywords = "Access locality; Access patterns; Data-parallel
language; Fortran D; Index-based distributions;
Inter-processor locality; Intra-processor locality;
Irregular applications; Scalability; Sequential data
structures; Value-based distributions",
thesaurus = "Data structures; FORTRAN; Parallel languages;
Parallelising compilers",
}
@Article{Walker:1994:DSM,
author = "David W. Walker",
title = "The design of a standard message passing interface for
distributed memory concurrent computers",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "4",
pages = "657--673",
day = "31",
month = mar,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1994&volume=20&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See erratum \cite{Walker:1994:EDS}.",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1994&volume=20&issue=4&aid=865;
http://www.epm.ornl.gov/~walker/mpi/papers/parcomp94.ps.Z",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C5610N
(Network interfaces)",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "application topologies; Application topologies;
application topologies, Standard message passing
interface; collective communication; communication
contexts; Communication contexts; communication
contexts; communication routines; Communication
routines; distributed memory concurrent computers;
Distributed memory concurrent computers; distributed
memory concurrent computers; distributed memory
systems; message passing; MIMD; MPI; network
interfaces; point-to-point communication; process
groups; Process groups; standard message passing
interface; standards",
pubcountry = "Netherlands",
thesaurus = "Distributed memory systems; Message passing; Network
interfaces; Standards",
treatment = "P Practical",
}
@Article{Walker:1994:EDS,
author = "David W. Walker",
title = "Erratum to: {``The design of a standard message
passing interface for distributed memory concurrent
computers''}",
journal = j-PARALLEL-COMPUTING,
volume = "20",
number = "8",
pages = "1215--1215",
month = aug,
year = "1994",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Apr 06 15:06:32 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See \cite{Walker:1994:DSM}.",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "application topologies; collective communication;
communication contexts; distributed memory concurrent
computers; message passing; point-to-point
communication; process groups; standards",
}
@InProceedings{Wark:1994:PIR,
author = "P. Wark and J. Holt",
title = "{PVM} Implementation of a Repeated Matching Heuristic
For Vehicle Routing",
crossref = "Arnold:1994:PCT",
pages = "207--216 (or 207--214??)",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput., Univ. of Southern
Queensland, Toowoomba, Qld., Australia",
classification = "C4240P (Parallel programming and algorithm theory);
C6120 (File organisation); C7430 (Computer
engineering)",
corpsource = "Dept. of Math. and Comput., Univ. of Southern
Queensland, Toowoomba, Qld., Australia",
keywords = "Benchmark problems; benchmark problems; computational
complexity; MIMD parallel computer; NP-hard problem;
parallel algorithms; Parallel Virtual Machine; PVM
implementation; Repeated matching heuristic; repeated
matching heuristic; routing; Software package PVM;
software package PVM; structure; structures; SUN
workstations; tree; tree data; Tree structure; vehicle;
Vehicle routing; virtual machines",
pubcountry = "Netherlands",
thesaurus = "Computational complexity; Parallel algorithms; Tree
data structures; Virtual machines",
treatment = "A Application; P Practical",
}
@Article{Welch:1994:PVM,
author = "L. R. Welch",
title = "A Parallel Virtual Machine for Programs Composed of
Abstract Data Types",
journal = j-IEEE-TRANS-COMPUT,
volume = "43",
number = "11",
pages = "1249--1261",
month = nov,
year = "1994",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/12.324558",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110J (Object-oriented programming); C6110P
(Parallel programming); C6120 (File organisation);
C6150N (Distributed systems)",
corpsource = "Dept. of Comput. and Inf. Sci., New Jersey Inst. of
Technol., Newark, NJ, USA",
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
keywords = "abstract data types; ADTs; ARC; Asynchronous Remote
Procedure Call; automatic parameter restoration; data
structures; data synchronization; database management;
dynamic load balancing; languages; machines;
modularity; multiprocessing programs; parallel
programming; parallel virtual machine; programming;
remote procedure calls; reuse; software reusability;
system development; systems; virtual",
treatment = "P Practical",
}
@InProceedings{White:1994:VVC,
author = "R. White",
title = "{VCMON} --- the {VM\slash ESA Connectivity Monitor}",
crossref = "Anonymous:1994:PSE",
pages = "783--792",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Velocity Software Inc., Boston, MA, USA",
classification = "C6150G (Diagnostic, testing, debugging and
evaluating systems); C6150J (Operating systems); C6150N
(Distributed systems software)",
keywords = "ADSM; AVS; Computer architecture; Connectivity
product; LFS; PVM; RSCS; TCP/IP; VCMON; Virtual
machine; VM; VM/ESA Connectivity Monitor; VTAM",
thesaurus = "Client-server systems; Open systems; Operating systems
[computers]; System monitoring; Virtual machines",
}
@PhdThesis{Wilhelms:1994:DAL,
author = "Gerhard Wilhelms",
title = "{Dynamische adaptive Lastverteilung f{\"u}r PVM
mittels unscharfer Benutzerprofile -- $ \mbox {PVM}^+ $
(English: Dynamic adaptive load distribution for PVM by
blurred user profiles -- $ \mbox {PVM}^+ $ ).}",
type = "Dissertation",
school = "Math.-Naturwiss. Fakult{\"a}t, Universit{\"a}t
Augsburg",
address = "Augsburg, Germany",
pages = "iv + 74",
year = "1994",
bibdate = "Sat Apr 06 15:01:28 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@InProceedings{Yan:1994:PTA,
author = "J. C. Yan",
title = "Performance tuning with {AIMS} --- an {Automated
Instrumentation and Monitoring System} for
multicomputers",
crossref = "Hesham:1994:PTS",
pages = "625--633",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "RECOM Technol., NASA Ames Res. Center, Moffett Field,
CA, USA",
classification = "C5470 (Performance evaluation and testing); C6110P
(Parallel programming); C6130B (Graphics techniques);
C6150G (Diagnostic, testing, debugging and evaluating
systems); C6150N (Distributed systems)",
keywords = "AIMS; Automated Instrumentation and Monitoring System;
C programs; Data collection overhead; Event recorders;
FORTRAN programs; Multicomputers; Multiprocessors;
Parallel program execution; Parallel programming
paradigm; Performance data collection; Performance
evaluation; Performance tuning; PVM; Resource
allocation algorithms; Run-time performance-monitoring
library; Scalable multiprocessor; Software toolkit;
Source-code instrumentor; Trace post-processor;
Trace-file analysis; Trace-file animation",
thesaurus = "Computer animation; Computerised instrumentation;
Computerised monitoring; Data acquisition;
Multiprocessing systems; Parallel programming;
Performance evaluation; Resource allocation; System
monitoring; Tuning",
}
@Article{Yi:1994:PID,
author = "Sung Yi and K. H. Pierson and M. F. Ahmad",
title = "Parallel implementation of dynamic simulation to
filamentary composite structures with general rate
dependent damping",
journal = j-COMPUT-SYST-ENG,
volume = "5",
number = "4-6",
pages = "469--477",
month = aug # "--" # dec,
year = "1994",
CODEN = "COSEEO",
ISSN = "0956-0521",
ISSN-L = "0956-0521",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Nat. Center for Supercomput. Applications, Illinois
Univ., Urbana, IL, USA",
classification = "C4130 (Interpolation and function approximation);
C4185 (Finite element analysis); C6110P (Parallel
programming); C7440 (Civil and mechanical engineering
computing)",
fjournal = "Computing systems in engineering: an international
journal",
keywords = "CM-5; Conjugate gradient algorithm; Dynamic
simulation; Dynamic viscoelastic finite element
algorithm; Filamentary composite structures; Generic
message passing library; PVM master/slave visco-elastic
finite element program; Rate dependent damping;
Scalable distributed parallel environment",
thesaurus = "Conjugate gradient methods; Damping; Digital
simulation; Finite element analysis; Message passing;
Parallel programming; Structural engineering computing;
Viscoelasticity",
}
@InProceedings{Zdetsis:1994:PMD,
author = "A. D. Zdetsis and R. Biswas",
title = "A Parallel Molecular Dynamics Strategy For {PVM}",
crossref = "Turchi:1994:SDA",
pages = "713--718",
year = "1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Zemla:1994:WTC,
author = "A. Zemla",
title = "Wavelet transforms computing on {PVM}",
crossref = "Dongarra:1994:PSC",
pages = "534--546",
year = "1994",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallel computing methods are developed especially in
centers that have expensive multiprocessor computers.
The advantage of PVM (Parallel Virtual Machine) is that
it permits a network of heterogeneous Unix computers to
be used as a single large parallel computer. Thus large
computational problems can be solved by using the
aggregate power of many computers. We present some PVM
computational experiments of wavelet transforms in
image processing. Some PVM experiments were carried out
on an IBM PC 486 working under the LINUX system.",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Math., Polish Acad. of Sci., Warsaw, Poland",
classification = "B0230 (Integral transforms); B0290Z (Other numerical
methods); B6140C (Optical information, image and video
signal processing); C1130 (Integral transforms); C1250
(Pattern recognition); C4190 (Other numerical methods);
C5260B (Computer vision and image processing
techniques); C6110P (Parallel programming); C6150J
(Operating systems); C6150N (Distributed systems
software)",
corpsource = "Inst. of Math., Polish Acad. of Sci., Warsaw, Poland",
keywords = "aggregate power; Aggregate power; IBM PC; image
processing; Image processing; large parallel computer;
LINUX system; machines; methods; microcomputer
applications; multiprocessor computers.; Multiprocessor
computers.; network; network of heterogeneous Unix
computers; Network of heterogeneous Unix computers;
operating systems; Parallel computing; Parallel
computing methods; parallel programming; Parallel
Virtual Machine; PVM; single; Single large parallel
computer; Unix; virtual; wavelet transforms; Wavelet
transforms computing",
pubcountry = "Germany",
sponsororg = "Danish Comput. Centre for Res. and Educ.; Inst. Math.
Modelling; Tech. Univ. Denmark",
thesaurus = "Image processing; Microcomputer applications; Network
operating systems; Parallel programming; Unix; Virtual
machines; Wavelet transforms",
treatment = "P Practical",
xxnote = "NB: special form AT{\&T} required to get correct
alpha-style labels.",
}
@InProceedings{Zielinski:1994:PPS,
author = "K. Zielinski and M. Gajecki and G. Czajkowski",
title = "Parallel programming systems for {LAN} distributed
computing",
crossref = "IEEE:1994:IPN",
pages = "600--607",
year = "1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci., Univ. of Min. and Metall.,
Cracow, Poland",
classification = "B6210L (Computer communications); C5620L (Local area
networks); C6110P (Parallel programming); C6140D (High
level languages)",
keywords = "ANSA; Communication tests; Distributed computing
environments; Distributed programming systems
construction; Experimental results; LAN distributed
computing; Linda; P4; Parallel programming systems;
Processor farm model efficiency; PVM; Run time
efficiency; SR; Strand",
thesaurus = "Local area networks; Parallel languages; Parallel
programming; Software packages",
}
@InProceedings{Zu:1994:OSM,
author = "Hong Zu and Ya-Dong Gui and L. M. Ni",
title = "Optimal software multicast in wormhole-routed
multistage networks",
crossref = "IEEE:1994:PSW",
pages = "703--712",
year = "1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inf. Sci. Inst., Univ. of Southern California, Marina
del Rey, CA, USA",
classification = "C4230M (Multiprocessor interconnection); C5220P
(Parallel architecture); C5440 (Multiprocessing
systems); C6110P (Parallel programming)",
keywords = "64-Node SP-1; Application level broadcast; Collective
communication; IBM SP-1; Interconnection architecture;
Meiko CS-2; Multistage interconnection networks;
Optimal multicast algorithm; Optimal software
multicast; Public domain MPI; Scalable parallel
computers; Switching technology; System level multicast
service; TMC CM-5; Wormhole routed multistage networks;
Wormhole-routed multistage networks",
thesaurus = "Multistage interconnection networks; Parallel
algorithms; Parallel machines",
}
@InProceedings{Almeida:1995:CST,
author = "F. Almeida and F. Garcia and J. Roda and D. Morales
and Rodriguez and C.",
title = "A comparative study of two distributed systems: {PVM}
and transputers",
crossref = "Cook:1995:TAS",
pages = "244--258",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C1160 (Combinatorial mathematics); C1180
(Optimisation techniques); C4240P (Parallel programming
and algorithm theory); C5220P (Parallel architecture);
C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support)",
corpsource = "Univ. de La Laguna, Tenerife, Spain",
keywords = "algorithms; branch and bound; distributed system;
divide and conquer methods; divide and conquer parallel
heapsort algorithm; dynamic programming; environment;
Inmos language; integer knapsack problem; LAN; load
balancing; operations research; parallel; parallel
algorithm; parallel development environment; parallel
virtual machine; programming environments; PVM;
quicksort algorithm; software; sorting; sorting
problem; system; systems; transputer; transputer links;
travelling salesman problem; travelling salesman
problems",
pubcountry = "Netherlands",
sponsororg = "Transputer Consortium; World occam and Transputer User
Group; et al",
treatment = "P Practical",
}
@InProceedings{Aloisio:1995:UPW,
author = "G. Aloisio and M. A. Bochicchio",
title = "The use of {PVM} with workstation clusters for
distributed {SAR} data processing",
crossref = "Hertzberger:1995:HPM",
pages = "570--581",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Fac. di Ingegneria, Lecce Univ., Italy",
classification = "B5230 (Electromagnetic compatibility and
interference); B6140C (Optical information, image and
video signal processing); B6320 (Radar equipment,
systems and applications); C1250 (Pattern recognition);
C5260B (Computer vision and image processing
techniques)",
corpsource = "Fac. di Ingegneria, Lecce Univ., Italy",
keywords = "active sensor; Active sensor; backscattered echo
signals; Backscattered echo signals; cluster of
workstations; Cluster of workstations; digital
processing; Digital processing; distributed SAR data
processing; Distributed SAR data processing; echo; high
resolution ground; High resolution ground images; IBM
RISC; IBM RISC System 6000/350; image focusing
algorithm; Image focusing algorithm; image processing;
images; PVM; radar; remote sensing; Remote sensing;
synthetic aperture; System 6000/350; workstation
clusters; Workstation clusters",
pubcountry = "Germany",
thesaurus = "Echo; Image processing; Remote sensing; Synthetic
aperture radar",
treatment = "A Application; P Practical",
}
@InProceedings{Alves:1995:WPC,
author = "A. Alves and L. Silva and J. Carreira and J. G.
Silva",
title = "{WPVM}: parallel computing for the people",
crossref = "Hertzberger:1995:HPM",
pages = "582--587",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. de Engenharia Inf., Coimbra Univ., Portugal",
classification = "C5440 (Multiprocessing systems); C6150J (Operating
systems); C6180 (User interfaces); C7430 (Computer
engineering)",
keywords = "Microsoft Windows Operating System; MS Windows;
Parallel machine; PC LANs; PVM implementation; Windows
Parallel Virtual Machine",
thesaurus = "Operating systems [computers]; Parallel machines; User
interfaces; Virtual machines",
}
@InProceedings{Ancona:1995:PAD,
author = "M. Ancona and M. {De Benedetto}",
title = "A parallel algorithm for `document segmentation'",
crossref = "IEEE:1995:PEW",
pages = "516--521",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartamento di Inf. e Scienza dell'Inf., Genoa Univ.,
Italy",
classification = "C4240P (Parallel programming and algorithm theory);
C5260B (Computer vision and image processing
techniques); C6130D (Document processing techniques)",
keywords = "Data parallel approach; Divide and conquer
implementation; Document segmentation; Parallel
algorithm; PVM3 system; Xy-tree; Xy-trees",
thesaurus = "Divide and conquer methods; Document image processing;
Image segmentation; Parallel algorithms; Tree data
structures",
}
@Article{Anonymous:1995:BRPb,
author = "Anonymous",
title = "Book Review: {{\booktitle{PVM: Parallel virtual
machine: a users' guide and tutorial for networked
parallel computing}}: By Al Geist, Adam Beguelin, Jack
Dongarra, Weicheng Jiang, Robert Manchek and Vaidy
Sunderam. MIT Press, Cambridge, MA. (1994). 279 pages.
\$19.95}",
journal = j-COMPUT-MATH-APPL,
volume = "30",
number = "9",
pages = "122--122",
month = nov,
year = "1995",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:22 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/0898122195901973",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Anonymous:1995:BRU,
author = "Anonymous",
title = "Book Review: {{\booktitle{Using MPI: Portable parallel
programming with the message-passing interface}}: By
William Gropp, Ewing Lusk and Anthony Skjellum. MIT
Press, Cambridge, MA. (1994). 307 pages. \$24.95}",
journal = j-COMPUT-MATH-APPL,
volume = "30",
number = "9",
pages = "122--122",
month = nov,
year = "1995",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:22 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/089812219590199X",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@InProceedings{Anonymous:1995:UPH,
author = "Anonymous",
title = "Using {PVM} to Host {CLIPS} in Distributed
Environments",
crossref = "Anonymous:1995:CCS",
pages = "203--211",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Appiani:1995:PSI,
author = "E. Appiani and M. Bologna and M. Corvi and M.
Iardella",
title = "{PVM} in a shared-memory industrial multiprocessor",
crossref = "Hertzberger:1995:HPM",
pages = "588--593",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Res. and Dev. Services, Elsag Bailey, Genova, Italy",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C7430 (Computer
engineering)",
keywords = "EMMA2E; ESPRIT project; Message-passing environment;
Performance; Portable parallel applications; PVM;
Shared-memory environment; Shared-memory industrial
multiprocessor",
thesaurus = "Parallel processing; Shared memory systems; Virtual
machines",
}
@InProceedings{Appiani:1995:PSM,
author = "E. Appiani and M. Bologna and M. Corvi and M.
Iardella",
title = "{PVM} in a shared-memory industrial multiprocessor",
crossref = "Hertzberger:1995:HPM",
pages = "588--593",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems)C5220P (Parallel
architecture); C7430 (Computer engineering)",
corpsource = "Res. and Dev. Services, Elsag Bailey, Genova, Italy",
keywords = "EMMA2E; ESPRIT project; message-; parallel
applications; parallel processing; passing environment;
performance; portable; PVM; shared memory systems;
shared-memory environment; shared-memory industrial
multiprocessor; virtual machines",
pubcountry = "Germany",
treatment = "A Application; P Practical",
}
@InProceedings{Arioli:1995:PSB,
author = "M. Arioli and A. Drummond and I. S. Duff and D. Ruiz",
title = "A parallel scheduler for block iterative solvers in
heterogeneous computing environments",
crossref = "Bailey:1995:PSS",
pages = "460--465",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Istituto di Analisi Numerica, CNR, Pavia, Italy",
classification = "B0290F (Interpolation and function approximation);
C4130 (Interpolation and function approximation);
C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessing systems)",
keywords = "Block iterative solvers; Cimmino method; Communication
networks; Heterogeneous computing environments;
Heterogeneous processors; Parallel scheduler; PVM 3",
thesaurus = "Iterative methods; Parallel algorithms; Parallel
machines; Scheduling; Telecommunication networks",
}
@InProceedings{Arnow:1995:DLB,
author = "D. M. Arnow",
title = "{DP}: a library for building portable, reliable
distributed applications",
crossref = "USENIX:1995:PUT",
pages = "235--247",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. and Inf. Sci., Brooklyn Coll., NY,
USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
keywords = "Asynchronous delivery; Communication tool; Disjunctive
programming; Distributed processing; Distributed
programming; DP; Failure tolerance; Integer goal
programming code; Interrupt generating message;
Library; Message operation; Message operations; Monte
Carlo; Portable software building; Process creation;
Process management; Reliable distributed application;
Software package; Software portability; Software
support",
thesaurus = "Application generators; Authoring systems; Distributed
processing; Software fault tolerance; Software
libraries; Software packages; Software portability",
}
@InProceedings{Asenjo:1995:SLF,
author = "R. Asenjo and E. L. Zapata",
title = "Sparse {LU} factorization of the {Cray T3D}",
crossref = "Hertzberger:1995:HPM",
pages = "690--696",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Archit., Malaga Univ., Spain",
classification = "C4140 (Linear algebra); C4240P (Parallel programming
and algorithm theory); C6110P (Parallel programming);
C6150N (Distributed systems software)",
keywords = "Compressed row storage; Cray T3D; Cyclic distribution;
Distributed memory machines; Dynamic data movement;
Fill-in; Local storage schemes; Overall efficiency;
Parallel algorithm; Processor mesh; PVM message passing
interface; Semi-ordered linked list; Sparse codes;
Sparse LU factorization; Sparse matrices; SPMD
programming model; Two-dimensional linked list",
thesaurus = "Cray computers; Distributed memory systems; Message
passing; Parallel algorithms; Parallel programming;
Sparse matrices",
}
@InProceedings{Ashby:1995:PPG,
author = "S. F. Ashby and R. D. Falgout and S. G. Smith and A.
F. B. Tompson",
title = "The parallel performance of a groundwater flow code on
the {Cray T3D}",
crossref = "Bailey:1995:PSS",
pages = "131--136",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Center for Comput. Sci. and Eng., Lawrence Livermore
Nat. Lab., CA, USA",
classification = "A0260 (Numerical approximation and analysis); A0270
(Computational techniques); A4755M (Flow through porous
media); A9240K (Groundwater); C4240P (Parallel
programming and algorithm theory); C6110P (Parallel
programming); C7340 (Geophysics computing)",
keywords = "Computational kernels; Cray T3D; Distributed memory
MIMD machines; Groundwater flow code; Parallel
performance; PVM message-passing library;
Three-dimensional heterogeneous porous media",
thesaurus = "Flow through porous media; Geophysics computing;
Groundwater; Message passing; Numerical analysis;
Parallel programming",
}
@InProceedings{Ayguade:1995:DUA,
author = "E. Ayguade and J. Garcia and M. Girones and J. Labarta
and J. Torres and M. Valero",
title = "Detecting and using affinity in an automatic data
distribution tool",
crossref = "Pingali:1995:LCP",
pages = "61--75",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. d'Arquitectura de Computadors, Univ. Politecnica
de Catalunya, Barcelona, Spain",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
keywords = "Affinity; Alignment; Alignment functions; Alignment
preferences; Arrays; Automatic data distribution tool;
Data Distribution Tool; Fortran77; Loop reference
patterns; Perfect Club benchmarks; Programs; Reference
pattern analysis; SPEC benchmarks; Static functions;
Tool phases",
thesaurus = "Parallel programming; Software tools",
}
@InProceedings{Bakhtiari:1995:APL,
author = "S. Bakhtiari and R. Safavi-Naini",
title = "Application of {PVM} to linear cryptanalysis",
crossref = "Gray:1995:PCT",
pages = "278--279",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Wollongong Univ., NSW, Australia",
classification = "C4240P (Parallel programming and algorithm theory);
C6130S (Data security)",
corpsource = "Wollongong Univ., NSW, Australia",
keywords = "attack; Attack; block cipher algorithms; Block cipher
algorithms; cryptography; Data; Data Encryption
Standard; Encryption Standard; linear cryptanalysis;
Linear cryptanalysis; parallel algorithms; PVM; virtual
machines",
pubcountry = "Netherlands",
thesaurus = "Cryptography; Parallel algorithms; Virtual machines",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Barbour:1995:PIG,
author = "A. E. Barbour and M. F. Gabre",
title = "Parallel Implementation of {Gauss--Seidel} and
Conjugate Gradient For Solving System of Linear
Equations {$ A x = b $} Using {PVM}",
crossref = "Aityan:1995:PFI",
pages = "33--36",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Georgia Southern
Univ., Statesboro, GA, USA",
classification = "B0290F (Interpolation and function approximation);
B0290H (Linear algebra); C4130 (Interpolation and
function approximation); C4140 (Linear algebra); C4240P
(Parallel programming and algorithm theory); C5440
(Multiprocessing systems)",
corpsource = "Dept. of Math. and Comput. Sci., Georgia Southern
Univ., Statesboro, GA, USA",
keywords = "algorithms; conjugate gradient methods; Conjugate
gradient methods; definite band matrix; equations;
Gauss--Seidel method; iterations; Iterations; linear;
Linear equations; matrix algebra; parallel; parallel
implementation; Parallel implementation; parallel
machines; positive; Positive definite band matrix; PVM;
solution vector; Solution vector; systematic behavior;
Systematic behavior",
thesaurus = "Conjugate gradient methods; Matrix algebra; Parallel
algorithms; Parallel machines",
treatment = "A Application; P Practical",
}
@Article{Beaumont:1995:DPG,
author = "P. M. Beaumont and P. T. Bradshaw",
title = "A distributed parallel genetic algorithm for solving
optimal growth models",
journal = j-COMP-ECONOMICS,
volume = "8",
number = "3",
pages = "159--179",
month = aug,
year = "1995",
CODEN = "CNOMEL",
ISSN = "0927-7099",
ISSN-L = "0927-7099",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Econ., Florida State Univ., Tallahassee, FL,
USA",
classification = "C1180 (Optimisation techniques); C1290D (Systems
theory applications in economics and business); C4130
(Interpolation and function approximation); C4150
(Nonlinear and functional equations); C4180 (Integral
equations); C6150N (Distributed systems software);
C7120 (Financial computing); C7310 (Mathematics
computing)",
fjournal = "Computational Economics",
keywords = "Agent discounted utility maximization; Chebyshev
polynomial series expansion; Competing nodes;
Distributed parallel genetic algorithm; Economic
growth; Exact Euler equation; Finite horizon;
First-order conditions; Function topology;
Generalization; Infinite horizon; Multiple state
problems; Nonlinear integral equation; Optimal function
fitting; Parameter space searching; PVM; Single-state
deterministic optimal growth model; State variable
range; State-space searching; Taylor-Uhlig problem;
Workstation cluster",
pubcountry = "Netherlands",
thesaurus = "Chebyshev approximation; Distributed algorithms;
Economic cybernetics; Financial data processing;
Genetic algorithms; Integral equations; Mathematics
computing; Nonlinear equations; Polynomials;
State-space methods",
}
@Article{Beguelin:1995:REP,
author = "Adam Beguelin and Jack Dongarra and Al Geist and
Robert Manchek and Vaidy Sunderam",
title = "Recent Enhancements to {PVM}",
journal = j-IJSAHPC,
volume = "9",
number = "2",
pages = "108--127",
month = "Summer",
year = "1995",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover
library database",
abstract = "This paper presents new features of PVM, a popular
standard for writing parallel programs that execute
over networks of heterogeneous machines. Although PVM
has become an important infrastructure for parallel
programmers, we continue to develop the system based
both on user feedback and our own research interests.
In this paper we present new communications routines
and briefly characterize their performance. We describe
new extensible services that allow advanced users to
customize certain aspects of the default PVM
functionality. An overview of shared-memory PVM
optimizations is presented. PVM's new tracing facility
and a graphical console that utilizes this capability
are described. Finally, we discuss future extensions to
PVM now under investigation.",
acknowledgement = ack-nhfb,
affiliation = "Carnegie Mellon Univ",
affiliationaddress = "Pittsburgh, PA, USA",
classification = "722.1; 722.2; 722.4; 723.1; 723.1.1; 921.5; C5440
(Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
journalabr = "Int J Supercomput Appl High Perform Comput",
keywords = "Advanced users; advanced users; communications;
Communications routines; Computer architecture;
Computer networks; Computer programming languages;
Computer software; console; Data communication systems;
Data storage equipment; evaluation; Extensible
services; extensible services; graphical; Graphical
console; Heterogeneous machines; heterogeneous
machines; Message passing; Optimization; parallel
machines; Parallel processing systems; Parallel
programmers; parallel programmers; parallel
programming; Parallel programs; parallel programs;
Parallel virtual machine; Parallel Virtual Machine;
Performance; performance; PVM; routines; Shared memory;
shared memory; Shared-memory PVM optimizations;
shared-memory PVM optimizations; software libraries;
software performance; software standards; Standard;
standard; systems; Tracing facility; tracing facility;
User feedback; user feedback; virtual machines",
thesaurus = "Parallel machines; Parallel programming; Shared memory
systems; Software libraries; Software performance
evaluation; Software standards; Virtual machines",
treatment = "A Application; P Practical",
}
@InProceedings{Bendrider:1995:SME,
author = "M. Bendrider and J.-M. Leclercq",
title = "Second-Order {M{\o}ller--Plesset} and {Epstein-Nesbet}
Corrections to the Molecular Charge Density:
Distributed Computing on a Cluster of Heterogeneous
Workstations with the {PVM} System",
crossref = "Bernardi:1995:CCE",
pages = "73--??",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Benkner:1995:VFA,
author = "S. Benkner",
title = "{Vienna Fortran 90} --- an advanced data parallel
language",
crossref = "Malyshkin:1995:PCT",
pages = "142--156",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. for Software Technol., Wien Univ., Austria",
classification = "C6110P (Parallel programming); C6120 (File
organisation); C6140D (High level languages)",
keywords = "Advanced data parallel language; Data distribution;
Distributed memory parallel computers; Explicit user
control; Pointer objects; Shared memory programming
paradigm; User defined data structures; Vienna Fortran
90",
thesaurus = "FORTRAN; Message passing; Parallel languages; Shared
memory systems; Storage management",
}
@Article{Berendsen:1995:GMP,
author = "H. J. C. Berendsen and D. van der Spoel and R. van
Drunen",
title = "{GROMACS}: a message-passing parallel molecular
dynamics implementation",
journal = j-COMP-PHYS-COMM,
volume = "91",
number = "1-3",
pages = "43--56",
month = sep,
year = "1995",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Bioson Res. Inst., Groningen Univ., Netherlands",
classification = "A3115 (General mathematical and computational
developments for atoms and molecules); A3420
(Interatomic and intermolecular potentials and forces);
A3425 (Intramolecular energy transfer; A3520D
(Interatomic distances and angles in molecules); A6120J
(Computer simulation of static and dynamic liquid
behaviour); A8710 (General, theoretical, and
mathematical biophysics); A8715 (Molecular biophysics);
C6110P (Parallel programming); C7320 (Physics and
chemistry computing); C7330 (Biology and medical
computing); dynamics of van der Waals molecules);
intramolecular dynamics",
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
keywords = "Analysis tools; Aqueous environment;
Biomacromolecules; Buckingham potentials; Charge
groups; Conversion programs; Cosine power series
interactions; Coulomb potentials; Custom-designed
32-processor ring GROMACS; Dihedral angles; Energy
minimization program; Fixed bonded interactions;
GROMACS software; GROningen MAchine for Chemical
Simulation; Interprocessor communication; Lennard-Jones
potentials; Message-passing parallel molecular dynamics
implementation; Molecular dynamics program; Parallel
message-passing implementation; Parallel system;
Particle decomposition; Pressure scaling; Rectangular
periodic boundary conditions; Temperature scaling;
Variable nonbonded pair interactions",
pubcountry = "Netherlands",
thesaurus = "Biology computing; Bond angles; Chemistry computing;
Digital simulation; Electric potential; Lennard-Jones
potential; Molecular biophysics; Molecular dynamics
method; Parallel programming",
}
@Article{Bernaschi:1995:DRP,
author = "Massimo Bernaschi and Giorgio Richelli",
title = "Development and results of {PVMe} on the {IBM 9076
SP1}",
journal = j-J-PAR-DIST-COMP,
volume = "29",
number = "1",
pages = "75--83",
day = "15",
month = aug,
year = "1995",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1995.1107",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:18:58 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1107/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1107/production/pdf",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6115 (Programming
support)",
corpsource = "IBM Eur. Center for Sci. and Eng. Comput., Rome,
Italy",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "IBM 9076 SP1; IBM's AIX implementation; message
passing; parallel machines; PARMACS; passing
programming model; programming environments; PVM
message; PVMe",
treatment = "A Application; P Practical",
}
@InProceedings{Bernaschi:1995:PEI,
author = "M. Bernaschi and G. Richelli",
title = "{PVMe}: an enhanced implementation of {PVM} for the
{IBM 9076 SP2}",
crossref = "Hertzberger:1995:HPM",
pages = "461--471",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM Eur. Center for Sci. and Eng. Comput., Rome,
Italy",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "IBM Eur. Center for Sci. and Eng. Comput., Rome,
Italy",
keywords = "IBM 9076 SP2; IBM's AIX implementation; message;
message passing; Message passing programming model;
parallel programming; passing programming model; PVMe",
pubcountry = "Germany",
thesaurus = "Message passing; Parallel programming",
treatment = "P Practical",
}
@InProceedings{Bickham:1995:POM,
author = "J. L. Bickham",
title = "Parallel ocean modeling using {Glenda}",
crossref = "ACM:1995:PAS",
pages = "58--63",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Univ. of Southern Mississippi, Hattiesburg, MS, USA",
classification = "C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems); C7340
(Geophysics computing)",
keywords = "Array dependency; Debugging methods; Glenda; Ocean
dynamics; Parallel ocean modeling; Parallel version;
Parallelization process; PVM; SWEM",
thesaurus = "Geophysics computing; Oceanographic techniques;
Parallel programming; Program debugging",
}
@InProceedings{Bischof:1995:CSM,
author = "C. Bischof and S. Huss-Lederman and Xiaobai Sun and A.
Tsao and T. Turnbull",
title = "A Case Study of {MPI}: Portable and Efficient
Libraries",
crossref = "Bailey:1995:PSS",
pages = "728--733",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6150N (Distributed systems
software)",
conftitle = "Proceedings of the Seventh SIAM Conference on Parallel
Processing for Scientific Computing",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "1 dimensional broadcast; 1 Dimensional broadcast;
ANL/MS MPI implementation; Argonne National
Laboratory/Mississippi State; broadcasting; case study;
Case study; CM5; Delta; efficient libraries; Efficient
libraries; Intel Delta; message passing; Message
Passing Interface standard; MPI; MPI based
implementations; MPI broadcast collective operation;
native NX message passing systems; Native NX message
passing systems; optimized versions; Optimized
versions; Paragon; parallel programming; parallel
programming system; Parallel programming system;
portable public domain version; Portable public domain
version; software libraries; software portability;
software standards; SP1",
thesaurus = "Broadcasting; Message passing; Parallel programming;
Software libraries; Software portability; Software
standards",
treatment = "P Practical",
}
@InProceedings{Bjorge:1995:ISS,
author = "D. Bjorge",
title = "Implementation of the semi-implicit scheme in a
message passing version of {HIRLAM} (weather
forecasting)",
crossref = "Hoffmann:1995:CAP",
pages = "75--90",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Norwegian Meteorol. Inst., Oslo, Norway",
classification = "A9260X (Weather analysis and prediction); C4185
(Finite element analysis); C6110P (Parallel
programming); C7340 (Geophysics computing)",
keywords = "Algorithms; Atmosphere; Cray T3D SHMEM; DNMI; HIRLAM;
Intel NX; Message passing; Meteorology; MPP; Numerical
model; Parallel iterative Helmholtz solver; Parallel
programming; PVM; Semi-implicit scheme; Semiimplicit
scheme; Time integration scheme; Weather forecasting",
thesaurus = "Digital simulation; Finite element analysis; Iterative
methods; Message passing; Numerical analysis; Parallel
processing; Parallel programming; Weather forecasting",
}
@InProceedings{Blaszczyk:1995:PCE,
author = "A. Blaszczyk and Z. Andjelic and P. Levin and A.
Ustundag",
title = "Parallel computation of electric fields in a
heterogeneous workstation cluster",
crossref = "Hertzberger:1995:HPM",
pages = "606--611",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Corp. Res., Asea Brown Boveri AG, Heidelberg,
Germany",
classification = "C5440 (Multiprocessing systems); C7310 (Mathematics
computing); C7320 (Physics and chemistry computing);
C7430 (Computer engineering)",
keywords = "3D boundary element code; Benchmark problems; Design
process; Dynamic load balancing; Electric fields;
Heterogeneous workstation cluster; Parallel
computation; PVM communication software",
thesaurus = "Electric fields; Mathematics computing; Parallel
processing; Physics computing; Virtual machines",
}
@InProceedings{Boianov:1995:DLC,
author = "L. Boianov and I. Jelly",
title = "Distributed logic circuit simulation on a network of
workstations",
crossref = "IEEE:1995:PEW",
pages = "304--310",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Lab. for Distributed Syst. and Comput. Networks, Acad.
of Sci., Sofia, Bulgaria",
classification = "B1130B (Computer-aided circuit analysis and design);
C5210B (Computer-aided logic design); C6150N
(Distributed systems software); C7410D (Electronic
engineering computing)",
keywords = "Digital circuits; Distributed digital logic
simulation; Logic circuit simulation; Logical
simulation algorithms; Parallel Virtual Machine",
thesaurus = "Digital simulation; Distributed processing; Logic
CAD",
}
@InProceedings{Boryczko:1995:NIC,
author = "I. Boryczko and J. Kitowski and J. Moscinski and A.
Leszczynski",
title = "Numerically intensive computing as a benchmark for
parallel computer architectures",
crossref = "Hertzberger:1995:HPM",
pages = "118--123",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. of Comput. Sci., Cracow, Poland",
classification = "C4100 (Numerical analysis); C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C5470
(Performance evaluation and testing); C5620L (Local
area networks); C5670 (Network performance); C7320
(Physics and chemistry computing)",
keywords = "Computer network; Execution time; Multiprocessors;
Numerically intensive computing; Parallel
architectures; Parallel computer architecture
benchmark; PVM environment; Vector supercomputers",
thesaurus = "Local area networks; Molecular dynamics method;
Multiprocessing systems; Parallel architectures;
Performance evaluation; Physics computing; Vector
processor systems",
}
@InProceedings{Branca:1995:CBH,
author = "A. Branca and M. Ianigro and A. Distante",
title = "A comparison between {HPF} and {PVM} for data parallel
algorithms on a cluster of workstations using a high
speed network",
crossref = "Hertzberger:1995:HPM",
pages = "930--931",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Briscolini:1995:PID,
author = "M. Briscolini",
title = "A parallel implementation of a {3-D} pseudospectral
based code on the {IBM 9076} scalable {POWER} parallel
system",
journal = j-PARALLEL-COMPUTING,
volume = "21",
number = "11",
pages = "1849--1862",
day = "29",
month = nov,
year = "1995",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Apr 14 12:05:41 MDT 1997",
bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1995&volume=21&issue=11;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1995&volume=21&issue=11&aid=1027",
acknowledgement = ack-nhfb,
classification = "A4725 (Turbulent flows, convection, and heat
transfer); B0290Z (Other numerical methods); C4190
(Other numerical methods); C5440 (Multiprocessing
systems); C7310 (Mathematics computing); C7320 (Physics
and chemistry computing)",
corpsource = "IBM ECSEC, Eur. Center for Sci. and Eng. Comput.,
Roma, Italy",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "3-D; 3-D pseudospectral based code; 9076 scalable
POWERparallel system; architecture; computational
kernels; computing; D FFTs; fast Fourier transforms;
high intensive numerical simulations; homogeneous
turbulent flows; IBM; implementations; mathematics
computing; message; message passing; MPL; numerical
analysis; parallel 3-; parallel distributed memory;
parallel implementation; parallel interfaces; parallel
processing; passing; physics; PVMe; turbulence",
treatment = "A Application; P Practical",
}
@TechReport{Bruck:1995:EMPa,
author = "Jehoshua Bruck",
title = "Efficient message passing interface ({MPI}) for
parallel computing on clusters of workstations",
type = "Research report",
number = "RJ 9925 (87305)",
institution = inst-IBM-WATSON,
address = inst-IBM-WATSON:adr,
pages = "31",
year = "1995",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallel computing on clusters of workstations and
personal computers has very high potential, since it
leverages existing hardware and software. Parallel
programming environments offer the user a convenient
way to express parallel computation and communication.
In fact, recently, a Message Passing Interface (MPI)
has been proposed as an industrial standard for writing
`portable' message-passing parallel programs. The
communication part of MPI consists of the usual
point-to-point communication as well as collective
communication. However, existing implementations of
programming environments for clusters are built on top
of a point-to-point communication layer (send and
receive) over local area networks (LANs) and, as a
result, suffer from poor performance in the collective
communication part. In this paper, we present an
efficient design and implementation of the collective
communication part in MPI that is optimized for
clusters of workstations. Our system consists of two
main components: the MPI-CCL layer that includes the
collective communication functionality of MPI and a
User-level Reliable Transport Protocol (URTP) that
interfaces with the LAN Data-link layer and leverages
the fact that the LAN is a broadcast medium. Our system
is integrated with the operating system via an
efficient kernel extension mechanism that we developed.
The kernel extension significantly improves the
performance of our implementation as it can handle part
of the communication overhead without involving user
space. We have implemented our system on a collection
of IBM RS/6000 workstations connected via a 10Mbit
Ethernet LAN. Our performance measurements are taken
from real scientific applications that runin a parallel
mode by means of the MPI. The hypothesis behind our
design is that system's performance will be bounded by
interactions between the kernel and user space rather
than by the bandwidth delivered by the LAN Data-Link
Layer. Our results indicate that the performance of our
MPI Broadcast (on top of Ethernet) is about twice as
fast as a recently published software implementation of
broadcast on top of ATM.",
acknowledgement = ack-nhfb,
annote = "December 13, 1995.",
institutes = "IBM Research Division",
keywords = "Computer interfaces",
}
@InProceedings{Bruck:1995:EMPb,
author = "Jehoshua Bruck and Danny Dolev and Ching-Tien Ho and
Marcel-Catalin Rosu and Ray Strong",
title = "Efficient {Message Passing Interface} ({MPI}) for
Parallel Computing on Clusters of Workstations",
crossref = "ACM:1995:SAA",
pages = "64--73",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "An efficient design and implementation of the
collective communication part in a Message Passing
Interface (MPI) that is optimized for clusters of
workstations is described. The system which consist of
two main components, the MPI-CCL layer and a User-level
Reliable Transport Protocol (URTP), is integrated with
the operating system via an efficient kernel extension
mechanism. The system is then implemented on a
collection of IBM RS\slash 6000 workstations connected
via a 10Mbit Ethernet LAN. Results indicate that the
performance of the MPI Broadcast (on top of Ethernet)
is about twice as fast as a recently published software
implementation of broadcast on top of ATM.",
acknowledgement = ack-nhfb,
affiliation = "California Inst of Technology",
affiliationaddress = "Pasadena, CA, USA",
classification = "716.1; 722.2; 722.3; 722.4; 723.1; C5470
(Performance evaluation and testing); C5610N (Network
interfaces); C5620L (Local area networks); C5640
(Protocols); C5670 (Network performance); C6110P
(Parallel programming); C6115 (Programming support);
C6150N (Distributed systems software)",
conference = "Proceedings of the 7th Annual ACM Symposium on
Parallel Algorithms and Architectures, SPAA'95",
conftitle = "Proceedings of Seventh Annual ACM Symposium on
Parallel Algorithms and Architectures",
corpsource = "California Inst. of Technol., Pasadena, CA, USA",
journalabr = "Annu ACM Symp Parallel Algorithms Archit",
keywords = "10 Mbit/s; application program interfaces; broadcast
medium; Broadcast medium; collective communication;
Collective communication; Communication overhead;
communication overhead; Communication overhead;
Computer operating systems; Computer software
portability; Computer systems programming; Computer
workstations; Data communication systems; Ethernet;
Ethernet LAN; IBM RS/6000 workstations; industrial
standard; Industrial standard; Interfaces (computer);
Kernel extension mechanism; kernel extension mechanism;
Kernel extension mechanism; LAN Data link-layer; LAN
data-link layer; Local area networks; local area
networks; message passing; Message passing interface;
message passing interface; Message passing interface;
MPI CCL layer; MPI-CCL layer; network interfaces;
Network protocols; operating system; Operating system;
parallel computing; Parallel computing; Parallel
processing systems; Parallel programming; parallel
programming; Performance; performance; Performance;
performance evaluation; Personal computers;
Point-to-point communication; point-to-point
communication; Point-to-point communication; portable
message-passing parallel programs; Portable
message-passing parallel programs; Program processors;
programming environments; Programming environments;
scientific programs; Scientific programs; software
libraries; software portability; Systems analysis;
transport protocols; URTP; user space; User space;
User-level reliable transport protocol; user-level
reliable transport protocol; User-level reliable
transport protocol; workstation clusters; Workstation
clusters; workstations",
meetingaddress = "Santa Barbara, CA, USA",
meetingdate = "Jul 17--19 1995",
meetingdate2 = "07/17--19/95",
numericalindex = "Bit rate 1.0E+07 bit/s",
sponsor = "ACM SIGACT; ACM SIGARCH; EATCS",
sponsororg = "ACM; EATCS",
thesaurus = "Application program interfaces; Local area networks;
Message passing; Network interfaces; Parallel
programming; Performance evaluation; Programming
environments; Software libraries; Software portability;
Transport protocols; Workstations",
treatment = "P Practical",
}
@InProceedings{Bubeck:1995:DSC,
author = "T. Bubeck and M. Hiller and W. Kuchlin and W.
Rosenstiel",
title = "Distributed symbolic computation with {DTS}",
crossref = "Ferreira:1995:PAI",
pages = "231--248",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Wilhelm-Schickard-Inst. fur Inf., Tubingen Univ.,
Germany",
classification = "C4130 (Interpolation and function approximation);
C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6115 (Programming
support); C6130S (Data security); C6150N (Distributed
systems software)",
keywords = "Anonymous compute servers; Asynchronous RPC
abstraction; C threads interface; Cryptosystem;
Distributed symbolic computation; Distributed threads
system; DTS; Fork/join parallel programming; Highly
data-dependent algorithm parallelisation; Irregular
algorithm parallelisation; Multiprocessor workstation;
Multithreading; Parallel long integer multiplication;
Parallel multi-variate polynomial resultant
computation; Performance results; Programming
environment; PVM; Shared memory threads",
thesaurus = "Arithmetic; Cryptography; Distributed memory systems;
Multiprocessing programs; Multiprocessing systems;
Parallel algorithms; Parallel programming; Polynomials;
Programming environments; Remote procedure calls;
Shared memory systems; Software performance evaluation;
Symbol manipulation; Workstations",
}
@Article{Bunge:1995:MCM,
author = "Hans-Peter Bunge and John R. Baumgardner",
title = "Mantle convection modeling on parallel virtual
machines",
journal = j-COMPUT-PHYS,
volume = "9",
number = "2",
pages = "207--??",
month = mar,
year = "1995",
CODEN = "CPHYE2",
DOI = "https://doi.org/10.1063/1.168525",
ISSN = "0894-1866 (print), 1558-4208 (electronic)",
ISSN-L = "0894-1866",
bibdate = "Wed Apr 10 08:45:53 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computphys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://aip.scitation.org/doi/10.1063/1.168525",
acknowledgement = ack-nhfb,
ajournal = "Comput. Phys",
fjournal = "Computers in Physics",
journal-URL = "https://aip.scitation.org/journal/cip",
}
@InProceedings{Carreira:1995:DEL,
author = "J. Carreira and L. Silva and J. G. Silva",
title = "On the design of {Eilean}: a {Linda-like} library for
{MPI}",
crossref = "IEEE:1995:PSP",
pages = "175--184",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Coimbra Univ., Portugal",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C6140D (High level languages)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Coimbra Univ., Portugal",
keywords = "access policies; Access policies; communication
system; Communication system; distribution policies;
Distribution policies; Eilean; hierarchical
distribution; Hierarchical distribution; hierarchical
partitioning scheme; Hierarchical partitioning scheme;
Linda; Linda-like library; message passing; message
passing standard; Message passing standard; MPI;
parallel languages; parallel library; Parallel library;
parallel programming; programming paradigm; Programming
paradigm; run-time system; Run-time system; software
libraries; software library; Software library; software
portability; Software portability; tuple mapping task;
Tuple mapping task; tuple space; Tuple space",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Message passing; Parallel languages; Parallel
programming; Software libraries; Software portability",
treatment = "P Practical",
}
@TechReport{Casanova:1995:PPM,
author = "Henri Casanova and Jack Dongarra and Weicheng Jiang",
title = "The Performance of {PVM} on {MPP} Systems",
type = "Technical report",
institution = inst-UTK,
address = inst-UTK:adr,
month = aug,
year = "1995",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/utk/papers/pvmmpp.ps;
http://www.netlib.org/utk/papers/pvmmpp/pvmmpp.html;
http://www.netlib.org/utk/people/JackDongarra/pdf/pvmmpp.pdf",
acknowledgement = ack-nhfb,
}
@Article{Casas:1995:MMT,
author = "Jeremy Casas and Dan L. Clark and Ravi Konuru and
Steve W. Otto and Robert M. Prouty and Jonathan
Walpole",
title = "{MPVM}: a Migration Transparent Version of {PVM}",
journal = j-COMP-SYS,
volume = "8",
number = "2",
pages = "171--216",
month = "Spring",
year = "1995",
CODEN = "CMSYE2",
ISSN = "0895-6340",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Oregon Graduate Inst. of Sci. and Technol., Beaverton,
OR, USA",
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software); C7430 (Computer engineering)",
corpsource = "Oregon Graduate Inst. of Sci. and Technol., Beaverton,
OR, USA",
fjournal = "Computing Systems",
keywords = "Dynamic process migration; dynamic process migration;
general-; General-purpose workstation environments;
Idle-cycles; idle-cycles; message passing; message-;
Message-passing parallel machine; Migratable PVM;
Migration-transparent version; migration-transparent
version; MPVM; Off-loading; off-loading; Parallel
computations; parallel computations; parallel machines;
parallel programming; Parallel Virtual Machine; passing
parallel machine; Performance; performance; purpose
workstation environments; software performance
evaluation; Unix; UNIX-based computers; virtual
machines; workstations",
thesaurus = "Message passing; Parallel machines; Parallel
programming; Software performance evaluation; Unix;
Virtual machines; Workstations",
treatment = "P Practical",
}
@InProceedings{Cavender:1995:APN,
author = "M. E. Cavender and Xiaodong Zhang",
title = "Asynchronous {PVM} Network Computing",
crossref = "Bailey:1995:PSS",
pages = "772--773",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "High Performance Comput. and Software Lab., Texas
Univ., San Antonio, TX, USA",
classification = "C5620L (Local area networks); C5640 (Protocols);
C6150N (Distributed systems software)",
corpsource = "High Performance Comput. and Software Lab., Texas
Univ., San Antonio, TX, USA",
keywords = "asynchronous PVM network computing; Asynchronous PVM
network computing; blocking; Blocking; incoming message
buffer; Incoming message buffer; interrupt; Interrupt;
local area networks; message passing; Message passing;
operation; performance penalty; Performance penalty;
processors; Processors; program; Program; protocols;
PVM daemon; receiver; Receiver; stop and; Stop and wait
protocol; synchronized; Synchronized operation; user
program; User program; wait protocol",
thesaurus = "Local area networks; Message passing; Protocols",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Cavender:1995:SSA,
author = "Mark E. Cavender and Xiaodong Zhang",
title = "Software support for asynchronous computing across
networks",
crossref = "IEEE:1995:PNA",
pages = "376--382",
year = "1995",
CODEN = "PSICD2",
ISSN = "0730-6512",
bibdate = "Fri May 24 09:58:00 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95CB35838.",
abstract = "This paper describes the design and implementation of
asynchronous communication library routines for
distributed computing across networks of workstations.
The new system is based on modifications of the
existing PVM message-passing environment. An intensive
and comparative study of synchronous, asynchronous and
non-blocking communication protocols is addressed in
terms of their design, implementation and applications.
Experimental performance comparisons of an application
program using the three communication protocols on a
network of workstations, are also presented. The
experimental results show the power of the asynchronous
communication library and the effective enhancements of
the PVM message-passing environment.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Texas at San Antonio",
affiliationaddress = "San Antonio, TX, USA",
classification = "722.2; 722.4; 723.1; 723.5; C5620L (Local area
networks); C5640 (Protocols); C6110B (Software
engineering techniques); C6115 (Programming support);
C6150N (Distributed systems software)",
conference = "Proceedings of the 19th Annual International Computer
Software and Applications Conference COMPSAC '95",
journalabr = "Proc IEEE Comput Soc Int Comput Software Appl Conf",
keywords = "Application program; Asynchronous communication
library routine design; Asynchronous communication
library routines; Asynchronous communication protocols;
Asynchronous computing; Computer aided software
engineering; Computer networks; Computer workstations;
Data communication systems; Distributed computer
systems; Distributed computing; Modified PVM
message-passing environment; Network protocols;
Nonblocking communication protocols; Parallel virtual
machine (PVM) message passing environment; Performance
comparisons; Software support; Synchronous
communication protocols; Workstation network",
meetingaddress = "Dallas, TX, USA",
meetingdate = "Aug 9--11 1995",
meetingdate2 = "08/09--11/95",
sponsor = "IEEE",
thesaurus = "Local area networks; Message passing; Network
operating systems; Operating systems [computers];
Protocols; Software libraries; Software performance
evaluation; Workstations",
}
@InProceedings{Chamaret:1995:PFE,
author = "B. Chamaret and H. Cherefi and S. Ubeda",
title = "Parallel filter estimation maximisation algorithm for
segmentation on a {LAN} of workstation",
crossref = "Bailey:1995:PSS",
pages = "68--69",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "TSI Lab., Univ. Jean-Monnet, Saint-Etienne, France",
classification = "B6140C (Optical information, image and video signal
processing); B6210L (Computer communications); C1250
(Pattern recognition); C4240P (Parallel programming and
algorithm theory); C5260B (Computer vision and image
processing techniques); C5620L (Local area networks)",
keywords = "Bayesian segmentation algorithm; Grey level images;
Image segmentation; LAN of workstation; Parallel filter
estimation maximisation algorithm; Parallel Virtual
Machine package; Portable parallel application",
thesaurus = "Bayes methods; Image segmentation; Local area
networks; Parallel algorithms",
}
@InProceedings{Chang:1995:EPCa,
author = "S.-L. Chang and D. H. C. Du and J. Hsieh and M. Lin",
title = "Enhanced {PVM} Communications Over a High-Speed Local
Area Network",
crossref = "Alnuweiri:1995:PHF",
pages = "37--46",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Chang:1995:EPCb,
author = "Sheue-Ling Chang and David Hung-Chang Du and Jenwei
Hsieh and Rose P. Tsang and Mengjou Lin",
title = "Enhanced {PVM} Communications over a {High-Speed
LAN}",
journal = j-IEEE-PAR-DIST-TECH,
volume = "3",
number = "3",
pages = "20--32",
month = "Fall",
year = "1995",
CODEN = "IPDTEX",
DOI = "https://doi.org/10.1109/M-PDT.1995.414841",
ISSN = "1063-6552 (print), 1558-1861 (electronic)",
ISSN-L = "1063-6552",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Performance results of PVM over a local ATM network
show the availability of much greater communication
bandwidth over traditional LANs such as Ethernet.
Realizing the full potential of high-speed networks,
therefore, will require further improvements in both
hardware and software components of network I/O
subsystems.",
abstract2 = "Enhanced Parallel Virtual Machine (PVM) communications
over a high speed local area network is described.
Performance results of PVM over a local asynchronous
transfer mode (ATM) show the availability of much
greater communication bandwidth over traditional LANs.
Application-level performance, however, still lags far
behind the capabilities of the physical medium.
Realizing the full potential of high-speed networks,
therefore, will require further improvements in both
hardware and software components of network input\slash
output subsystems.",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Univ. of Minnesota",
affiliationaddress = "Minneapolis, MN, USA",
classification = "716; 722.2; 722.3; 722.4; 723; 731; B6210L (Computer
communications); B6230 (Switching centres and
equipment); C5620L (Local area networks); C5670
(Network performance)",
corpsource = "Dept. of Comput. Sci., Minnesota Univ., Minneapolis,
MN, USA",
fjournal = "IEEE parallel and distributed technology: systems and
applications",
journalabr = "IEEE Parallel Distrib Technol",
keywords = "application-level performance; Application-level
performance; application-level performance;
Asynchronous transfer mode; asynchronous transfer mode;
Communication bandwidth; communication bandwidth;
Computer architecture; Data communication systems;
evaluation; Fiber distributed data interface; high-;
high-speed LAN; High-speed LAN; high-speed LAN;
High-speed networks; Interfaces (computer); Local area
networks; local area networks; Local area networks;
local area networks; local ATM network; Local ATM
network; local ATM network; Multicasting measurements;
Network I/O subsystems; network I/O subsystems;
Parallel processing systems; Parallel virtual machine
(PVM); Parallel virtual machine (pvm); Performance;
performance; Performance; performance; PVM
communications; speed networks; Systems analysis;
systems analysis",
thesaurus = "Asynchronous transfer mode; Local area networks;
Performance evaluation",
treatment = "A Application; P Practical",
}
@InProceedings{Chapple:1995:PUL,
author = "S. R. Chapple and L. J. Clarke",
title = "The {Parallel Utilities Library}",
crossref = "IEEE:1995:PSP",
pages = "21--30",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Parallel Comput. Center, Edinburgh Univ., UK",
classification = "C5440 (Multiprocessing systems); C6110B (Software
engineering techniques); C6110P (Parallel programming);
C6150N (Distributed systems software); C7480
(Production engineering computing)",
keywords = "AEA Technology; Domain decomposition; Industrial
applications; Library modules; Message passing;
Message-Passing Interface; MPI; Parallel scalable I/O;
Parallel systems; Parallel Utilities Library; PUL;
Rolls-Royce; Shell UK; Task parallelism; Unstructured
mesh applications",
thesaurus = "Industries; Message passing; Parallel programming;
Software libraries; Software portability; Software
reusability; Subroutines",
}
@InProceedings{Clematis:1995:PPH,
author = "A. Clematis and B. Falcidieno and D. F. Prieto and M.
Spagnuolo",
title = "Parallel processing on heterogeneous networks for
{GIS} applications",
crossref = "Hertzberger:1995:HPM",
pages = "67--72",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IMA-CNR, Genoa, Italy",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6150N (Distributed systems
software); C7840 (Geography and cartography
computing)",
keywords = "Applications parallelization; Geographic information
systems; Heterogeneous networks; Linda; Network-based
parallel computing; Parallel program development;
Performance; Portable communication libraries; PVM;
Software portability; Software reusability",
thesaurus = "Geographic information systems; Parallel processing;
Software libraries; Software portability; Software
reusability",
}
@InProceedings{Clemencon:1995:AEP,
author = "C. Clemencon and A. Endo and J. Fritscher and A.
Muller and R. Ruhl and B. J. N. Wylie",
title = "The 'Annai' environment for portable distributed
parallel programming",
crossref = "El-Rewini:1995:PTE",
pages = "242--251 (vol. 2)",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Swiss Federal Inst. of Technol., Zurich, Switzerland",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C6150C (Compilers, interpreters and other processors);
C6150G (Diagnostic, testing, debugging and evaluating
systems); C6150N (Distributed systems software); C6180
(User interfaces)",
keywords = "Annai environment; Application developers; Common user
interface; Distributed memory parallel processors;
Dynamic data distributions; Feedback; Functionality
enhancements; High Performance Fortran compiler;
High-level data-parallel programming; Interactive
performance monitor; Language extensions; Low-level
machine interface; Low-level message-passing
programming; Message Passing Interface; Performance
analyzer; Performance results; Portability; Portable
distributed parallel programming environment;
Source-level debugger; Target hardware architecture;
Tool prototypes; Unstructured problem parallelization",
thesaurus = "Distributed memory systems; FORTRAN; Message passing;
Parallel programming; Program compilers; Program
debugging; Program diagnostics; Programming
environments; Software performance evaluation; Software
portability; Software tools; User interfaces",
}
@InProceedings{Clemencon:1995:IRD,
author = "C. Clemencon and J. Fritscher and M. J. Meehan and R.
R{\"u}hl",
title = "An Implementation of Race Detection and Deterministic
Replay with {MPI}",
crossref = "Haridi:1995:EPP",
pages = "155--166",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Centro Svizzero de Calcolo Sci., Eidgenossische Tech.
Hochschule, Manno, Switzerland",
classification = "C6110P (Parallel programming); C6110S (Software
metrics); C6115 (Programming support); C6140D (High
level languages); C6150C (Compilers, interpreters and
other processors)",
conftitle = "EURO-PAR '95. Parallel Processing. First International
EURO- PAR Conference. Proceedings",
corpsource = "Centro Svizzero de Calcolo Sci., Eidgenossische Tech.
Hochschule, Manno, Switzerland",
keywords = "Annai programming environment; Computational
efficiency; computational efficiency; Data-parallel
program; data-parallel program; Deterministic replay;
deterministic replay; FORTRAN; High Performance
Fortran; HPF; Integrated environment; integrated
environment; Joint CSCS-ETH/NEC Collaboration; message
passing; Message-passing program; message-passing
program; MPI; Parallel Debugging Tool; Parallel
language; parallel language; parallel programming;
parallelising compilers; Parallelized MPI program;
parallelized MPI program; PDT; Program debugging;
program debugging; Programming environment; programming
environment; programming environments; programming
languages; Race detection; race detection; Replaying
mechanism; replaying mechanism; software metrics;
Software performance; software performance; software
performance evaluation; Software tool; software tool;
Tracing; tracing",
thesaurus = "FORTRAN; Message passing; Parallel programming;
Parallelising compilers; Program debugging; Programming
environments; Programming languages; Software metrics;
Software performance evaluation",
treatment = "P Practical",
}
@InProceedings{Cooperman:1995:SBP,
author = "G. Cooperman",
title = "{STAR\slash MPI}: binding a parallel library to
interactive symbolic algebra systems",
crossref = "Levelt:1995:IIS",
pages = "126--132",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Coll. of Comput. Sci., Northeastern Univ., Boston, MA,
USA",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C7310 (Mathematics computing)",
keywords = "GCL; GNU Common LISP; Interactive symbolic algebra
systems; Mathematical group theory; Parallel library;
STAR/MPI; Symbolic algebra",
thesaurus = "Parallel programming; Software libraries; Symbol
manipulation",
}
@InProceedings{Cooperman:1995:SMB,
author = "Gene Cooperman",
title = "{STAR\slash MPI}: Binding a Parallel Library to
Interactive Symbolic Algebra Systems",
crossref = "Levelt:1995:IIS",
pages = "126--132",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This work is aimed at making parallel programming more
accessible to users of symbolic algebra systems and to
users of interactive languages in general. This is done
by integrating MPI (Message Passing Interface), a
portable, parallel message-passing library, with two
interactive languages: GCL (GNU Common LISP), and GAP.
The GAP system includes a general purpose language for
mathematical group theory, and LISP is the basis for
several general-purpose symbolic algebra systems. In
addition, a simple master-slave abstraction is written,
so that end-users need not learn any of the details of
the MPI function calls. This work is distinct from past
studies in that it provides the ability to
interactively create, test and modify a distributed
environment using the original interactive language and
a portable parallel library.",
acknowledgement = ack-nhfb,
affiliation = "Northeastern Univ",
affiliationaddress = "Boston, MA, USA",
classification = "721.1; 722.2; 722.4; 723.1; 723.5; 921.1; C6110B
(Software engineering techniques); C6110P (Parallel
programming); C6115 (Programming support); C7310
(Mathematics computing)",
conference = "Proceedings of the 1995 International Symposium on
Symbolic and Algebraic Computation",
conftitle = "Proceedings of International Symposium on Symbolic and
Algebraic Computation. ISSAC '95",
corpsource = "Coll. of Comput. Sci., Northeastern Univ., Boston, MA,
USA",
journalabr = "Int Symp Symbol Algebraic Comput ISSAC Proc",
keywords = "Algebra; Computational methods; Computer programming;
Computer programming languages; Computer simulation;
Computer software; GCL; GNU Common LISP; Interactive
computer systems; Interactive languages; Interactive
symbolic algebra systems; interactive symbolic algebra
systems; Interfaces (computer); mathematical group
theory; Mathematical techniques; Message passing
interface; Parallel library; parallel library; Parallel
processing systems; parallel programming; software
libraries; STAR/MPI; symbol manipulation; symbolic
algebra; User interfaces",
meetingaddress = "Montreal, Can",
meetingdate = "Jul 10--12 1995",
meetingdate2 = "07/10--12/95",
sponsororg = "ACM",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Corno:1995:PTA,
author = "F. Corno and P. Prinetto and M. Rebaudengo and M.
{Sonza Reorda} and E. Veiluva",
title = "A {PVM} tool for automatic test generation on parallel
and distributed systems",
crossref = "Hertzberger:1995:HPM",
pages = "39--44",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartimento di Autom. e Inf., Politecnico di Torino,
Italy",
classification = "B1130B (Computer-aided circuit analysis and design);
B2210B (Printed circuit layout and design); C5210B
(Computer-aided logic design); C6110P (Parallel
programming); C6150N (Distributed systems software);
C7410D (Electronic engineering computing)",
corpsource = "Dipartimento di Autom. e Inf., Politecnico di Torino,
Italy",
keywords = "algorithm; ATPG; automatic test generation; Automatic
test generation; automatic test pattern; Automatic test
pattern generation; automatic test software; circuit
CAD; CM-5; DEC Alpha AXP farm; distributed programming;
Distributed programming; distributed system;
Distributed system; efficient algorithm; Efficient
algorithm; electric circuit; Electric circuit;
electrical circuit; Electrical circuit; electronic CAD;
Electronic CAD; electronic circuit; Electronic circuit;
GATTO*; generation; genetic; Genetic algorithm;
integrated circuit; large sequential circuits; Large
sequential circuits; logic CAD; logic testing;
parallel; parallel architectures; parallel programming;
Parallel programming; portability; Portability;
portable message-passing libraries; Portable
message-passing libraries; programming; PVM tool;
software libraries; testing; VLSI; VLSI technology",
pubcountry = "Germany",
thesaurus = "Automatic test software; Circuit CAD; Integrated
circuit testing; Logic CAD; Logic testing; Parallel
architectures; Parallel programming; Software
libraries; VLSI",
treatment = "P Practical",
xxauthor = "F. Corno and P. Prinetto and M. Rebaudeng and M.
{Sonza Reorda} and E. Veiluva",
}
@Article{DAmbra:1995:CBC,
author = "P. D'Ambra and G. Giunta",
title = "Concurrent banded {Cholesky} factorization on
workstation networks using {PVM}",
journal = j-PARALLEL-COMPUTING,
volume = "21",
number = "3",
pages = "487--494",
day = "10",
month = mar,
year = "1995",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartimento di Matematica e Applicazioni, Naples
Univ., Italy",
classification = "C4140 (Linear algebra); C4240P (Parallel programming
and algorithm theory); C6110P (Parallel programming);
C7310 (Mathematics computing)",
corpsource = "Dipartimento di Matematica e Applicazioni, Naples
Univ., Italy",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "application programs; Application programs; banded
symmetric positive-definite matrix; Banded symmetric
positive-definite matrix; cluster; Cluster of
workstations; concurrent banded Cholesky factorization;
Concurrent banded Cholesky factorization; heterogeneous
processors; Heterogeneous processors; linear algebra;
mathematics computing; networks; of workstations;
optical fiber links; Optical fiber links; parallel
programming; Parallel Virtual Machine; software system;
Software system; virtual machines; workstation;
Workstation networks; workstations",
pubcountry = "Netherlands",
thesaurus = "Linear algebra; Mathematics computing; Parallel
programming; Virtual machines; Workstations",
treatment = "A Application; P Practical",
}
@InProceedings{Davies:1995:NPE,
author = "Gregory Davies and Norman Matloff",
title = "Network-Specific Performance Enhancements for {PVM}",
crossref = "IEEE:1995:PFI",
pages = "205--210",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "PVM, a message-passing software system for parallel
processing, is used on a wide variety of processor
platforms, but this portability restricts execution
speed. The work here will address this problem mainly
in the context of Ethernet-based systems, proposing two
PVM enhancements for such systems. The first
enhancement exploits the fact that an Ethernet has
broadcast capability. Since unenhanced PVM must, to
keep portability, avoid using broadcast, execution
speed is sacrificed. In addition, the larger the
system, the larger the sacrifice in speed. A solution
to this problem is presented. The second enhancement is
intended for use in applications in which many
concurrent tasks finish at the same time, and thus
simultaneously try to transmit to a master process. On
an Ethernet, this produces excessively long random
backoffs, reducing program speed. An enhancement,
termed `programmed backoff,' is proposed.",
acknowledgement = ack-nhfb,
affiliation = "Tandem Computers",
affiliationaddress = "Cupertino, CA, USA",
classification = "716.1; 722; 722.3; 722.4; 723; 922.2; C5440
(Multiprocessing systems); C5620L (Local area
networks); C6150N (Distributed systems software)",
conference = "Proceedings of the 4th IEEE International Symposium on
High Performance Distributed Computing",
journalabr = "IEEE Int Symp High Perform Distrib Comput Proc",
keywords = "Algorithms; Broadcast capability; Broadcasting;
Communication channels (information theory); Computer
hardware; Computer networks; Computer software
portability; Concurrent tasks; Data communication
systems; Ethernet-based systems; Hypercube systems,
Network-specific performance enhancements;
Message-passing software system; Parallel processing;
Parallel processing systems; Program speed; Programmed
backoff; PVM; Statistical methods",
meetingaddress = "Washington, DC, USA",
meetingdate = "Aug 2--4 1995",
meetingdate2 = "08/02--04/95",
sponsor = "IEEE",
thesaurus = "Local area networks; Message passing; Parallel
processing",
}
@InProceedings{Davies:1995:NSP,
author = "G. Davies and N. Matloff",
title = "Network-specific performance enhancements for {PVM}",
crossref = "IEEE:1995:PFI",
pages = "205--210",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5620L (Local area
networks); C6150N (Distributed systems software)",
corpsource = "Tandem Comput. Inc., Cupertino, CA, USA",
keywords = "broadcast capability; concurrent tasks;
Ethernet-based; local area networks; message passing;
message-passing; network-specific performance
enhancements; parallel processing; program; programmed
backoff; PVM; software system; speed; systems",
sponsororg = "IEEE Tech. Committee on Distrib. Process.; Northeast
Parallel Architectures Centre (NPAC) at Syracuse Univ.;
ACM SIGCOMM; Rome Lab",
treatment = "A Application; P Practical",
}
@InProceedings{Decker:1995:TDU,
author = "T. Decker and R. Diekmann and R. Luling and B.
Monien",
title = "Towards developing universal dynamic mapping
algorithms",
crossref = "IEEE:1995:PSI",
pages = "456--459",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Paderborn Univ.,
Germany",
classification = "C5220P (Parallel architecture); C5620 (Computer
networks and techniques); C6150J (Operating systems)",
keywords = "Bidding-algorithms; Distributed runtime systems;
Dynamically generated tasks; Execution-times;
MIMD-system; MPI; Optimal K-values; PVM; Randomly
selected processors; Universal dynamic mapping
algorithms; Universally applicable strategy",
thesaurus = "Distributed processing; Resource allocation",
}
@TechReport{Dongarra:1995:IMS,
author = "Jack Dongarra and Steve W. Otto and Marc Snir and
David Walker",
title = "An Introduction to the {MPI Standard}",
type = "Technical report",
number = "CS-95-274",
institution = inst-UTK,
address = inst-UTK:adr,
month = jan,
year = "1995",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Appears in CACM \cite{Dongarra:1996:MPS}.",
URL = "http://www.netlib.org/tennessee/ut-cs-95-274.ps;
http://www.netlib.org/utk/papers/intro-mpi/intro-mpi.html;
http://www.netlib.org/utk/people/JackDongarra/pdf/ut-cs-95-274.pdf",
acknowledgement = ack-nhfb,
}
@Article{Dongarra:1995:PBC,
author = "J. J. Dongarra and T. Hey",
title = "The {ParkBench} benchmark collection",
journal = j-SUPERCOMPUTER,
volume = "11",
number = "2-3",
pages = "94--114",
month = jun,
year = "1995",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
classification = "C5440 (Multiprocessing systems); C6150C (Compilers,
interpreters and other processors)",
fjournal = "Supercomputer",
keywords = "Application kernels; Compact research applications;
Hierarchical structure; Low-level benchmarks; ParkBench
benchmark collection; Performance characteristics;
Synthetic compiler benchmark suite",
pubcountry = "Netherlands",
thesaurus = "Parallel processing; Program compilers",
}
@InProceedings{Dowaji:1995:LBS,
author = "S. Dowaji and C. Roucairol",
title = "Load balancing strategy and priority of tasks in
distributed environments",
crossref = "IEEE:1995:CPI",
pages = "15--22",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Lab. PRiSM, Univ. de Versailles-St-Quentin, France",
classification = "C1160 (Combinatorial mathematics); C1180
(Optimisation techniques); C4240P (Parallel programming
and algorithm theory); C6150N (Distributed systems
software)",
keywords = "Branch and bound algorithms; Distributed environments;
Graph theory; Load balancing; Lower bound; VCP",
thesaurus = "Combinatorial mathematics; Distributed algorithms;
Optimisation; Resource allocation",
}
@Article{Dragovitsch:1995:PPS,
author = "P. Dragovitsch and X. Zhao and L. C. Dennis and G. A.
Riccardi",
title = "{PVMGeant} --- a Parallel Simulation Code for the
{CLAS} Detector at {CEBAF}",
journal = j-IJSAHPC,
volume = "9",
number = "2",
pages = "128--137",
month = "Summer",
year = "1995",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Tue Feb 18 09:07:32 MST 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover
library database",
abstract = "Due to the need for extensive and detailed simulations
of the CEBAF Large Acceptance Spectrometer (CLAS), the
Monte-Carlo code CLASGeant was transferred to a
heterogeneous computing cluster and has been linked to
the Parallel Virtual Machine (PVM) message-passing
library. The resulting simulation package, PvmGeant,
achieves an almost linear speedup in physics-event
simulation. This article describes modifications to the
original GEANT code, its integration with PVM, and
performance tests that were conducted at the computing
cluster at The Supercomputing Computations Research
Institute at Florida State University. Particular
attention has been given to measuring the effect of
different data structures on the cost of network
communication between nodes.",
acknowledgement = ack-nhfb,
affiliation = "Supercomput. Comput. Res. Inst., Florida State Univ.",
affiliationaddress = "Tallahassee, FL, USA",
classification = "722.4; 723.1; 723.2; 723.5; 922.2; 941.3",
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
journalabr = "Int J Supercomput Appl High Perform Comput",
keywords = "CEBAF large acceptance spectrometer (CLAS);
Computational complexity; Computer networks; Computer
simulation; Computer software; Computing cluster;
Continuous electron beam accelerator facility (CEBAF);
Data communication systems; Data structures; Message
passing library; Monte Carlo methods; Parallel
processing systems; Parallel virtual machine; Software
package CLASGeant; Software package PvmGeant;
Spectrometers",
}
@InProceedings{Edjlali:1995:DPP,
author = "G. Edjlali and G. Agrawal and A. Sussman and J.
Saltz",
title = "Data parallel programming in an adaptive environment",
crossref = "IEEE:1995:PIP",
pages = "827--832",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Maryland Univ., College Park,
MD, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
keywords = "Adaptive environment; Communication patterns; Data
parallel programming; Data redistribution; Loop bounds;
Message passing; Multiblock Navier--Stokes solver;
Network of workstations; Performance results; PVM;
Runtime library; Runtime support",
thesaurus = "Message passing; Parallel programming; Programming
environments",
}
@Article{Fan:1995:DMP,
author = "W. C. Fan and J. A. {Halbleib, Sr.}",
title = "Distributed multitasking {ITS} with {PVM}",
journal = j-TRANS-AM-NUCL-SOC,
volume = "72",
number = "????",
pages = "146--147",
month = "????",
year = "1995",
CODEN = "TANSAO",
ISSN = "0003-018X",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sandia Nat. Labs., Albuquerque, NM, USA",
classification = "A0250 (Probability theory, stochastic processes, and
statistics); A0540 (Fluctuation phenomena, random
processes, and Brownian motion); A2820H (Neutron
diffusion); C1140G (Monte Carlo methods); C7470
(Nuclear engineering computing)",
conflocation = "Philadelphia, PA, USA; 25-29 June 1995",
conftitle = "1995 Annual Meeting of American Nuclear Society
(papers in summary form only received)",
corpsource = "Sandia Nat. Labs., Albuquerque, NM, USA",
fjournal = "Transactions of the American Nuclear Society",
keywords = "distributed multitasking ITS; Distributed multitasking
ITS; engineering computing; ITS Version 3.0; Monte
Carlo methods; neutron transport theory; nuclear; PVM
communication software; transport codes; Transport
codes",
thesaurus = "Monte Carlo methods; Neutron transport theory; Nuclear
engineering computing",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Fang:1995:PMS,
author = "Niandong Fang and H. Burkhart",
title = "{PEMPI} --- from {MPI} standard to programming
environment",
crossref = "IEEE:1995:PSP",
pages = "31--38",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Inf., Basel Univ., Switzerland",
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Dept. of Inf., Basel Univ., Switzerland",
keywords = "Basel Algorithm Classification Scheme; higher
abstractions; Higher abstractions; integrated
environment; Integrated environment; large scale
message passing applications; Large scale message
passing applications; machine best-fit implementation;
Machine best-fit implementation; message passing;
Message Passing Interface; message passing programs;
Message passing programs; message passing systems;
Message passing systems; MPI standard; parallel
programming; parallel programs; Parallel programs;
PEMPI; portability; Portability; programmability;
Programmability; programmer oriented abstractions;
Programmer oriented abstractions; programming
environment; Programming environment; programming
environments; software standards; software tools;
system- oriented level; System-oriented level; widely
used standard; Widely used standard",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Message passing; Parallel programming; Programming
environments; Software standards; Software tools",
treatment = "P Practical",
}
@InProceedings{Ferrari:1995:TDC,
author = "A. J. Ferrari and V. S. Sunderam",
title = "{TPVM}: distributed concurrent computing with
lightweight processes",
crossref = "IEEE:1995:PFI",
pages = "211--218",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Virginia Univ.,
Charlottesville, VA, USA",
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software)",
keywords = "Data dependencies; Data-driven scheduling model;
Distributed concurrent computing; Experimental
auxiliary subsystem; Explicit message passing model;
Library interface; Lightweight processes; Load balance;
Parallelism; Processor utilization; Scheduling;
SPMD-style algorithms; Threads-oriented PVM; TPVM",
thesaurus = "Message passing; Parallel processing; Scheduling",
}
@Article{Fineberg:1995:IMM,
author = "Samuel A. Fineberg",
title = "Implementing multidisciplinary and multi-zonal
applications using {MPI}",
journal = j-FRONTIERS-MASS-PAR-COMP-CONF-PROC,
pages = "496--503",
month = "????",
year = "1995",
bibdate = "Fri May 24 09:57:40 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95TH8024.",
abstract = "Multidisciplinary and multi-zonal applications are
codes where two or more distinct parallel programs or
copies of a single program are utilized to model a
single problem. To support such applications, a program
can be divided into several single program multiple
data stream (SPMD) applications, each of which solves
the equations for a single physical discipline or grid
zone. These applications are bound together to form a
single multidisciplinary or multi-zonal program in
which the constituent pans communicate via
point-to-point message passing routines. In this report
it is shown that the new Message Passing Interface
(MPI) standard is a viable portable library for
implementing the message passing portion of
multidisciplinary applications. Further, with the
extension of a portable loader, fully portable
multidisciplinary application programs can be
developed. Finally, the performance of MPI is compared
to that of some native message passing libraries. This
comparison shows that MPI can be implemented to deliver
performance commensurate with native message passing
libraries.",
acknowledgement = ack-nhfb,
affiliation = "NASA Ames Research Cent",
affiliationaddress = "Moffett Field, CA, USA",
classification = "722.2; 722.3; 722.4; 723.1; 723.2; 921.6",
conference = "Proceedings of the 5th Symposium on the Frontiers of
Massively Parallel Computation",
fjournal = "Frontiers of Massively Parallel Computation ---
Conference Proceedings",
journalabr = "Front Massively Parallel Comput Conf Proc",
keywords = "Codes (symbols); Computational methods; Computer
software; Computer software portability; Data
communication systems; Data handling; Interfaces
(computer); Mathematical models; Message passing;
Multidisciplinary program; Multiprogramming; Multizonal
program; Parallel processing systems; Resource
allocation; Single program multiple data stream;
Storage allocation (computer); Supervisory and
executive programs",
meetingaddress = "McLean, VA, USA",
meetingdate = "Feb 6--9 1995",
meetingdate2 = "02/06--09/95",
sponsor = "IEEE Computer Society",
}
@InProceedings{Ford:1995:NNN,
author = "Brian Ford",
title = "The New {NAG} Numerical {PVM} Library (or {A} New
Parallel Numerical Library Based on {PVM})",
crossref = "IFIP:1995:KWC",
pages = "??--??",
year = "1995",
bibdate = "Wed Jan 24 07:11:31 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.nsc.liu.se/~boein/ifip/kyoto/workshop-info/proceedings/ford/ford1.html",
acknowledgement = ack-nhfb,
}
@InProceedings{Franke:1995:AAV,
author = "E. A. Franke and S. D. Huffman and W. M. Carter and J.
P. Baumgartner and D. J. Wenzel",
title = "{AVTP} --- an architecture for visualization using
remote parallel\slash distributed computing",
crossref = "Grinstein:1995:VDE",
journal = j-PROC-SPIE,
volume = "2410",
pages = "230--237",
year = "1995",
CODEN = "PSISDG",
ISSN = "0277-786X (print), 1996-756X (electronic)",
ISSN-L = "0277-786X",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Southwest Res. Inst., San Antonio, TX, USA",
classification = "C6130B (Graphics techniques); C6150N (Distributed
systems software)",
fjournal = "Proceedings of the SPIE --- The International Society
for Optical Engineering",
keywords = "Advanced Visualization Technology Project; AVTP; Data
cache server; High speed data networks; Image
generation library; Image specification toolset;
Message passing; Parallel processor machines; PVM;
Remote computer resources; Remote distributed
computing; Remote parallel computing; Research and
development; Scalable computing; Shared memory;
Streamlines; Surfaces; System architectures; Vector
fields; Visualization architecture; Visualization
tools",
thesaurus = "Cache storage; Data visualisation; File servers;
Message passing; Multiprocessing programs",
}
@InProceedings{Franke:1995:MIS,
author = "H. Franke and P. Hochschild and P. Pattnaik and J.-P.
Prost and M. Snir",
title = "{MPI} on {IBM SP1\slash SP2}: current status and
future directions",
crossref = "IEEE:1995:PSP",
pages = "39--48",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
classification = "C5440 (Multiprocessing systems); C6110B (Software
engineering techniques); C6110P (Parallel programming);
C6150N (Distributed systems software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
keywords = "distributed memory systems; future directions; Future
directions; IBM computers; IBM Scalable Power PARALLEL
1; IBM Scalable Power PARALLEL 2; IBM SP1/SP2; initial
performance measurements; Initial performance
measurements; message passing; MPI; native EUI library;
Native EUI library; parallel programming; prototype
implementation; Prototype implementation; software
libraries; software standards",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Distributed memory systems; IBM computers; Message
passing; Parallel programming; Software libraries;
Software standards",
treatment = "P Practical",
}
@TechReport{Franke:1995:MPEa,
author = "Hubertus Franke",
title = "{MPI} programming environment for {IBM SP1\slash
SP2}",
type = "Research report",
number = "RC 19991 (88480)",
institution = inst-IBM-WATSON,
address = inst-IBM-WATSON:adr,
pages = "9",
year = "1995",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "In this paper we discuss an implementation of the
Message Passing Interface standard (MPI) for the IBM
Scalable Power PARALLEL 1 and 2 (SP1, SP2). Key to a
reliable and efficient implementation of a message
passing library on these machines is the careful design
of a UNIX-Socket like layer in the user space with
controlled access to the communication adapters and
with adequate recovery and flow control. The
performance of this implementation is at the same level
as the IBM-proprietary message passing library (MPL).
We also show that in the IBM SP1 and SP2 we achieve
integrated tracing ability, where both system events,
such as context switches and page fault etc., and MPI
related activities are traced, with minimal overhead to
the application program, thus presenting application
programmers the trace of all the events that ultimately
affect efficiency of a parallel program.",
acknowledgement = ack-nhfb,
keywords = "Parallel programming (Computer science)",
}
@InProceedings{Franke:1995:MPEb,
author = "Hubertus Franke and C. Eric Wu and Michel Riviere and
Pratap Pattnaik and Marc Snir",
title = "{MPI} Programming Environment for {IBM SP1\slash
SP2}",
crossref = "IEEE:1995:PIC",
pages = "127--135",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95CH35784.",
abstract = "In this paper we discuss an implementation of the
Message Passing Interface standard (MPI) for the IBM
Scalable Power PARALLEL 1 and 2 (SP1, SP2). Key to a
reliable and efficient implementation of a message
passing library on these machines is the careful design
of a UNIX-Socket like layer in the user space with
controlled access to the communication adapters and
with adequate recovery and flow control. The
performance of this implementation is at the same level
as the IBM-proprietary message passing library (MPL).
We also show that in the IBM SP1 and SP2 we achieve
integrated tracing ability, where both system events,
such as context switches and page fault etc., and MPI
related activities are traced, with minimal overhead to
the application program, thus presenting application
programmers the trace of all the events that ultimately
affect efficiency of a parallel program.",
acknowledgement = ack-nhfb,
affiliation = "IBM T. J. Watson Research Cent",
affiliationaddress = "Yorktown Heights, NY, USA",
classification = "722.2; 722.4; 723; 723.1; 723.1.1; C5440
(Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
conference = "Proceedings of the 15th International Conference on
Distributed Computing Systems",
conftitle = "Proceedings of 15th International Conference on
Distributed Computing Systems",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
journalabr = "Proc Int Conf Distrib Comput Syst",
keywords = "adequate recovery; Adequate recovery; application
program interfaces; application programmers;
Application programmers; Application programming
interfaces; communication adapters; Communication
adapters; Computer architecture; Computer programming;
Computer software; Computer system recovery; Fault
tolerant computer systems; flow control; Flow control;
IBM Scalable Power PARALLEL 1; IBM SP1/SP2; integrated
tracing ability; Integrated tracing ability; Interfaces
(computer); message passing; Message passing interface
standard; message passing interface standard; Message
passing interface standard; Message passing library;
MPI programming environment; page fault; Page fault;
Parallel processing systems; parallel program; Parallel
program; parallel programming; Performance; Power
parallel system; programming environments; Software
engineering; Systems analysis; UNIX; UNIX- Socket like
layer; UNIX-Socket like layer",
meetingaddress = "Vancouver, Can",
meetingdate = "May 30--Jun 2 1995",
meetingdate2 = "05/30--06/02/95",
sponsor = "IEEE Computer Society",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
thesaurus = "Application program interfaces; Message passing;
Parallel programming; Programming environments",
treatment = "A Application; P Practical",
}
@InProceedings{Ge:1995:DHA,
author = "Yuzhen Ge and L. T. Watson and E. G. {Collins, Jr.}",
title = "Distributed homotopy algorithms for {$ H^2 / H^\infty
$} controller synthesis",
crossref = "Bailey:1995:PSS",
pages = "84--89",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Butler Univ.,
Indianapolis, IN, USA",
classification = "C1310 (Control system analysis and synthesis
methods); C3220 (Controllers); C4240P (Parallel
programming and algorithm theory)",
keywords = "Distributed homotopy algorithms; H/sup 2//H/sup
infinity / controller synthesis; High performance
computation; Industrial design environment; Jacobian
matrix computation; Mixed-norm controller synthesis
problem; Parallel Virtual Machine; UNIX workstations",
thesaurus = "Control system synthesis; Controllers; Distributed
algorithms",
}
@InProceedings{Gentzsch:1995:STP,
author = "W. Gentzsch and U. Block and F. Ferstl",
title = "Software tools for parallel computers and workstation
clusters",
crossref = "Ferenczi:1995:PAH",
pages = "23--42",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "GENIAS Software GmbH, Neutraubling, Germany",
classification = "C5220P (Parallel architecture); C5430
(Microcomputers); C5440 (Multiprocessing systems);
C5540 (Terminals and graphic displays); C6115
(Programming support)",
keywords = "Benchmark results; EXPRESS; FORGE 90; GENIAS; Intel
iPSC/860; NCUBE/2; Parallel codes; Parallel computers;
Parsytec Multicluster; PVM/MPI; Software tools;
Workstation clusters; XHPF",
thesaurus = "Parallel processing; Software tools; Workstations",
}
@InProceedings{Gianuzzi:1995:UPI,
author = "V. Gianuzzi and F. Merani",
title = "Using {PVM} to Implement a Distributed Dependable
Simulation System",
crossref = "IEEE:1995:PEW",
pages = "529--535",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartimento di Inf. e Sci. dell'Inf., Genoa Univ.,
Italy",
classification = "C4240P (Parallel programming and algorithm theory);
C5470 (Performance evaluation and testing); C6150N
(Distributed systems software)",
corpsource = "Dipartimento di Inf. e Sci. dell'Inf., Genoa Univ.,
Italy",
keywords = "algorithms; checkpoint-restart mechanism;
Checkpoint-restart mechanism; distributed; distributed
algorithms; Distributed algorithms; distributed
dependable simulation system; Distributed dependable
simulation system; fault tolerant; fault tolerant
computing; Fault tolerant mechanisms; high speed; High
speed interconnection; interconnection; mechanisms;
message; passing; PVM routines; simulations modelling;
Simulations modelling; synchronisation; Virtual Time",
sponsororg = "Euromicro; Assoc.Italiana per Inf. Calcolo Autom",
thesaurus = "Distributed algorithms; Fault tolerant computing;
Message passing; Synchronisation",
treatment = "P Practical",
}
@InProceedings{Gillich:1995:FPP,
author = "S. Gillich and B. Ries",
title = "Flexible, portable performance analysis for {PARMACS}
and {MPI}",
crossref = "Hertzberger:1995:HPM",
pages = "937--??",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Greenfield:1995:OPS,
author = "J. Greenfield",
title = "An Overview of the {PVM} Software System",
crossref = "IEEE:1995:ISE",
pages = "17--23",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6150N (Distributed systems
software)",
corpsource = "Dept. of Electr. and Comput. Eng., New Mexico Univ.,
Albuquerque, NM, USA",
keywords = "analysis; debugging; machine; message passing;
parallel processing; Parallel Virtual Machine;
performance; PVM; software system; virtual; virtual
machines; visualization tools",
treatment = "P Practical",
}
@InProceedings{Gropp:1995:DPM,
author = "W. Gropp and E. Lusk",
title = "Dynamic process management in an {MPI} setting",
crossref = "IEEE:1995:PSI",
pages = "530--533",
year = "1995",
CODEN = "PSPDF8",
ISSN = "1063-6374",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95TB8131.",
abstract = "We describe an architecture for the runtime
environment for parallel applications as prelude to
describing how parallel application might interface to
their environment in a portable way. We propose
extensions to the Message-Passing Interface (MPI)
Standard that provide for dynamic process management,
including spawning of new processes by a running
application and connection to existing processes to
support client\slash server applications. Such
extensions are needed if more of the runtime
environment for parallel programs is to be accessible
to MPI programs or to be themselves written using MPI.
The extensions proposed here are motivated by real
applications and fit cleanly with existing concepts of
MPI. No changes to the existing MPI Standard are
proposed, thus all present MPI programs will run
unchanged.",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab.",
affiliationaddress = "Argonne, IL, USA",
classification = "722.2; 722.3; 722.4; 723.1; 902.2; C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C6150N
(Distributed systems software)",
conference = "Proceedings of the 1995 7th IEEE Symposium on Parallel
and Distributed Processing",
conftitle = "Proceedings of Seventh IEEE Symposium on Parallel and
Distributed Processing",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
journalabr = "IEEE Symp Parallel Distrib Process Proc",
keywords = "Client/server applications; Computer architecture;
Computer networks; Computer software; Computer systems
programming; Computer workstations; Data communication
systems; dynamic process management; Dynamic process
management; Interfaces (computer); message passing;
Message-passing interface; MPI setting; parallel
applications; Parallel applications; parallel
processing; Parallel processing systems; Parallel
programs; Process control; process management; Process
management; Real time systems; Resource allocation;
runtime environment; Runtime environment; Runtime
environments; Scheduling; Standards",
meetingaddress = "San Antonio, TX, USA",
meetingdate = "Oct 25--28 1995",
meetingdate2 = "10/25--28/95",
sponsor = "IEEE",
sponsororg = "IEEE Comput Soc. Tech. Committee on Comput.
Architecture; IEEE Comput. Soc. Tech. Committee on
Distributed Process.; IEEE Comput. Soc. Dallas
Chapter",
thesaurus = "Message passing; Parallel processing",
treatment = "P Practical",
}
@Article{Gropp:1995:EIS,
author = "W. D. Gropp and E. Lusk",
title = "Experiences with the {IBM SP1}",
journal = j-IBM-SYS-J,
volume = "34",
number = "2",
pages = "249--262",
year = "1995",
CODEN = "IBMSA7",
ISSN = "0018-8670",
bibdate = "Tue Mar 19 17:38:46 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.research.ibm.com/journal/sj34-2.html#seven",
abstract = "One of the first IBM parallel processing
computers---the SP1*---and the largest, with 128 nodes,
was installed in 1993 at Argonne National Laboratory.
It took only days, not months, to prepare for and
migrate applications to this parallel supercomputer,
demonstrating that high performance, parallelism, and
portability can coexist. This paper describes the early
experiences with the SP1 at Argonne, which provide
lessons for supercomputer system designers and users
alike. We explore what features of software technology
and system architecture enabled immediate and
successful use of the SP1. The paper concludes with a
brief indication of why the move to the SP2* software
environment using the SP2 communication adapters, the
use of the emerging Message-Passing Interface standard,
and the continued use of the SP1 processors have been
successful.",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6110P (Parallel
programming); C6150N (Distributed systems software)",
fjournal = "IBM Systems Journal",
keywords = "High performance; IBM parallel processing computers;
IBM SP1; Message passing interface standard; Parallel
supercomputer; Parallelism; Portability; Software
technology; SP1 processors; SP2 communication adapters;
SP2 software environment; Supercomputer system
designers; System architecture",
language = "English",
pubcountry = "USA",
thesaurus = "IBM computers; Message passing; Parallel
architectures; Parallel machines; Parallel
programming",
}
@InProceedings{Gropp:1995:IMM,
author = "W. Gropp and E. Lusk",
title = "Implementing {MPI}: the 1994 {MPI Implementors'
Workshop}",
crossref = "IEEE:1995:PSP",
pages = "55--59",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6150N (Distributed systems
software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "implementation process; Implementation process;
message passing; MPI implementation effort; parallel
computing; Parallel computing; parallel library;
Parallel library; parallel programming; software
libraries; software standards; standard message-passing
library interface; Standard message-passing library
interface; subroutines",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Message passing; Parallel programming; Software
libraries; Software standards; Subroutines",
treatment = "P Practical",
}
@InProceedings{Gropp:1995:MGX,
author = "W. Gropp and E. Karrels and E. Lusk",
title = "{MPE} graphics-scalable {X11} graphics in {MPI}",
crossref = "IEEE:1995:PSP",
pages = "49--54",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
classification = "C6110P (Parallel programming); C6130B (Graphics
techniques); C6150N (Distributed systems software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "communication patterns; Communication patterns;
communication traffic; Communication traffic; computer
graphics; library based message passing; Library based
message passing; message passing; MPE graphics; MPI;
MPI implementation; MPI message passing standard; MPI
Standard; parallel graphics library; Parallel graphics
library; parallel graphics operations; Parallel
graphics operations; parallel graphics routines;
Parallel graphics routines; parallel programming;
parallel programs; Parallel programs; parallel
semantics; Parallel semantics; programming libraries;
Programming libraries; scalable X11 graphics; Scalable
X11 graphics; semantics; Semantics; software standards;
subroutines; user control; User control; X-based
parallel graphics library",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Computer graphics; Message passing; Parallel
programming; Software standards; Subroutines",
treatment = "P Practical",
}
@InProceedings{Gropp:1995:MMI,
author = "W. Gropp and E. Lusk",
title = "The {MPI} Message-Passing Interface Standard: Overview
and Status",
crossref = "Dongarra:1995:HPC",
pages = "265--270",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Guan:1995:SCC,
author = "Xiaojun Guan and Richard J. Mural and Edward C.
Uberbacher",
title = "Sequence comparison on a cluster of workstations using
the {PVM} system",
crossref = "IEEE:1995:PIP",
pages = "190--195",
year = "1995",
CODEN = "PSPDF8",
DOI = "https://doi.org/10.1109/IPPS.1995.395931",
ISSN = "1063-6374",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Sequence comparison is one of the most important tools
in molecular biology research. As the amount of DNA
data increases rapidly, efficient sequence comparison
algorithms are essential in studying newly discovered
sequences. We have implemented a distributed sequence
comparison algorithm by Smith and Waterman on a cluster
of workstations using the PVM paradigm. This
implementation has achieved similar performance to the
Intel iPSC\slash 860 Hypercube, a massively parallel
computer. The distributed Smith-Waterman algorithm
serves as a search tool for two Internet servers GRAIL
and GENQUEST. This paper describes the implementation
and the performance of the algorithm.",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Natl Lab",
affiliationaddress = "Oak Ridge, TN, USA",
classification = "461.2; 721.1; 722.4; 723.1; C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C5620W
(Other computer networks); C7330 (Biology and medical
computing)",
conference = "Proceedings of the IEEE 9th International Parallel
Processing Symposium",
corpsource = "Div. of Comput. Sci. and Math., Oak Ridge Nat. Lab.,
TN, USA",
journalabr = "IEEE Symp Parallel Distrib Process Proc",
keywords = "Algorithms; cluster of workstations; Cluster of
workstations; Computational complexity; Computer
software; Computer workstations; DNA; DNA data; DNA
sequences; GENQUEST; GRAIL; hypercube; hypercube
networks; Intel iPSC/860; Intel iPSC/860 hypercube;
Internet; Internet servers; Internet servers GRAIL;
massively parallel computer; Massively parallel
computer; medical computing; molecular biology
research; Molecular biology research; molecular
biophysics; Parallel processing systems; Parallel
virtual machine; performance; Performance; PVM system;
sequence comparison; Sequence comparison; Smith
Waterman algorithm",
meetingaddress = "Santa Barbara, CA, USA",
meetingdate = "Apr 25--28 1995",
meetingdate2 = "04/25--28/95",
sponsor = "IEEE",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Parallel
Process",
thesaurus = "DNA; Hypercube networks; Internet; Medical computing;
Molecular biophysics",
treatment = "A Application; P Practical",
}
@InProceedings{Guarracino:1995:PMB,
author = "M. R. Guarracino and F. Perla",
title = "A parallel modified block {Lanczos} algorithm for
distributed memory architectures",
crossref = "IEEE:1995:PEW",
pages = "424--431",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dipartimento di Matematica e Applicazioni, Naples
Univ., Italy",
classification = "C4140 (Linear algebra); C4240P (Parallel programming
and algorithm theory); C6110P (Parallel programming)",
keywords = "Block column wrap-around matrices; Block Lanczos
algorithm; Distributed memory architectures;
Eigenproblems; Load-balancing; Parallel block Lanczos
algorithm; Parallel software",
thesaurus = "Distributed memory systems; Eigenvalues and
eigenfunctions; Matrix algebra; Parallel algorithms",
}
@InProceedings{Hardwick:1995:PVL,
author = "J. C. Hardwick",
title = "Porting a vector library: a comparison of {MPI},
{Paris}, {CMMD} and {PVM}",
crossref = "IEEE:1995:PSP",
pages = "68--77",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C6140D (High level languages)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
keywords = "CM-2; CM-5; CMMD; compiler target; Compiler target;
Cray C90; debugging; Debugging; message passing; MPI;
NESL; nested data-parallel languages; Nested
data-parallel languages; parallel; parallel languages;
parallel programming; parallel vector library CVL;
Parallel vector library CVL; Paris; portable MPI
implementation; Portable MPI implementation; Proteus;
PVM; RISC based MPP architectures; software libraries;
subroutines; vector library CVL; vector library
porting; Vector library porting; vector processor
systems",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Message passing; Parallel languages; Parallel
programming; Software libraries; Subroutines; Vector
processor systems",
treatment = "P Practical",
}
@InProceedings{Hariri:1995:STE,
author = "S. Hariri and Sung-Yong Park and R. Reddy and M.
Subramanyan and R. Yadav and G. C. Fox and M.
Parashar",
title = "Software tool evaluation methodology",
crossref = "IEEE:1995:PIC",
pages = "3--10",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Northeast Parallel Archit. Center, Syracuse Univ., NY,
USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
keywords = "Alpha cluster; ATM; Distributed computing software;
Distributed systems platforms; Ethernet; Express; FDDI;
IBM-SP1; Message passing tools; Multi-level evaluation
methodology; P4; Parallel computing software;
Programming paradigms; PVM; Software tool evaluation
methodology; SUN workstations",
thesaurus = "Message passing; Parallel programming; Software
performance evaluation; Software tools",
}
@InProceedings{Hausner:1995:EIP,
author = "M. Hausner and M. Burrows and C. A. Thekkath",
title = "Efficient implementation of {PVM} on the {AN2 ATM}
network",
crossref = "Hertzberger:1995:HPM",
pages = "562--569",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. fur Computersyst., Eidgenossische Tech.
Hochschule, Zurich, Switzerland",
classification = "B6150C (Communication switching); B6210L (Computer
communications); C5620L (Local area networks); C6115
(Programming support); C6150N (Distributed systems
software)",
corpsource = "Inst. fur Computersyst., Eidgenossische Tech.
Hochschule, Zurich, Switzerland",
keywords = "Alpha workstations; AN2 ATM network; asynchronous
transfer mode; ATM link bandwidth; coarse-grained;
Coarse-grained multicomputer; end-to-end PVM
communication performance; End-to-end PVM communication
performance; environments; high-speed ATM network;
High-speed ATM network; high-speed network; High-speed
network; local area networks; multicomputer;
programming; PVM environment; workstation cluster;
Workstation cluster; workstations",
pubcountry = "Germany",
thesaurus = "Asynchronous transfer mode; Local area networks;
Programming environments; Workstations",
treatment = "P Practical",
}
@InProceedings{Hoekstra:1995:CPP,
author = "A. G. Hoekstra and F. {Van der Linden} and P. M. A.
Sloot and L. O. Hertzberger",
title = "Comparing the {Parix} and {PVM} parallel programming
environments",
crossref = "Fritzson:1995:PPA",
pages = "288--292",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6110B (Software engineering
techniques); C6110P (Parallel programming); C6115
(Programming support); C6150N (Distributed systems
software)",
corpsource = "Parallel Sci. Comput. and Simulation Group, Amsterdam
Univ., Netherlands",
keywords = "communication capabilities; complexity analysis;
computational complexity; development; environments;
floating; floating point arithmetic; functionality;
generic; global communication times; native parallel
programming environments; parallel architectures;
parallel programming; parallel programming
environments; Parix parallel; Parsytec GCel; Parsytec
PowerXplorer; performance; performance penalties; point
communication times; point performance; point to;
portability; portable parallel program; PowerPC chip;
programmability; programming; programming environments;
PVM parallel programming environments; software;
software performance evaluation; software tools;
support; time; tool; transputer systems",
pubcountry = "Netherlands",
treatment = "P Practical",
xxauthor = "A. G. Hoekstra and P. M. A. Sloot and L. O.
Hertzberger",
xxcrossref = "VanKatwijk:1995:AAC",
}
@Article{Hollerbach:1995:FDA,
author = "Rainer Hollerbach",
title = "Fast dynamo action in spherical geometry: Numerical
calculations using parallel virtual machines",
journal = j-COMPUT-PHYS,
volume = "9",
number = "4",
pages = "460--??",
month = jul,
year = "1995",
CODEN = "CPHYE2",
DOI = "https://doi.org/10.1063/1.168547",
ISSN = "0894-1866 (print), 1558-4208 (electronic)",
ISSN-L = "0894-1866",
bibdate = "Wed Apr 10 08:45:55 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computphys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://aip.scitation.org/doi/10.1063/1.168547",
acknowledgement = ack-nhfb,
ajournal = "Comput. Phys",
fjournal = "Computers in Physics",
journal-URL = "https://aip.scitation.org/journal/cip",
}
@InProceedings{Hondroudakis:1995:PEV,
author = "A. Hondroudakis and R. Procter and K. Shanmugam",
title = "Performance evaluation and visualization with
{VISPAT}",
crossref = "Malyshkin:1995:PCT",
pages = "180--185",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Edinburgh Univ., UK",
classification = "C6110P (Parallel programming); C6110V (Visual
programming); C6115 (Programming support)",
keywords = "Graphical front end; Message passing; MPI; Parallel
programs; Performance analysis; Program execution;
VISPAT; Visualization",
thesaurus = "Data visualisation; Parallel programming; Software
performance evaluation; Software tools; Visual
programming",
}
@Article{Hong:1995:PNP,
author = "Lin Hong and Chen Huaping",
title = "{PVM} and network parallel computing",
journal = j-MINI-MICRO-SYSTEMS,
volume = "16",
number = "2",
pages = "53--58",
month = feb,
year = "1995",
CODEN = "XWJXEH",
ISSN = "1000-1220",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci. and Technol., Univ. of Sci. and
Technol. of China, Hefei, China",
classification = "C6150N (Distributed systems software)",
corpsource = "Dept. of Comput. Sci. and Technol., Univ. of Sci. and
Technol. of China, Hefei, China",
fjournal = "Mini-Micro Systems",
keywords = "computing model; Computing model; load balancing; Load
balancing; message passing; Message passing; network
parallel computing; Network parallel computing;
parallel granularity; Parallel granularity; parallel
processing; programming methodology; Programming
methodology; PVM; resource allocation; software
environment; Software environment; virtual machines",
language = "Chinese",
pubcountry = "China",
thesaurus = "Message passing; Parallel processing; Resource
allocation; Virtual machines",
treatment = "P Practical",
}
@InProceedings{Hui:1995:SPS,
author = "Chi-Chung Hui and Mounir Hamdi and Ishfaq Ahmad",
title = "Software platform for solving {PDEs} on distributed
systems: Implementation issues and performance
prediction",
crossref = "IEEE:1995:PNA",
pages = "383--388",
year = "1995",
CODEN = "PSICD2",
ISSN = "0730-6512",
bibdate = "Fri May 24 09:58:00 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95CB35838.",
abstract = "This paper describes the implementation and
performance of a parallel platform for solving partial
differential equations (PDEs) on distributed systems.
The platform has been implemented using PVM for a
network of workstations. It allows the inclusion of a
wide range of parameters and programming aids. The PDEs
are specified in the form of finite difference
equations. With a given set of parameters and a
partitioning strategy, the platform provides facilities
to record and predict the performance of an application
before running it. The performance prediction model
helps the user to identify the major bottlenecks of the
platform such that by reducing them, the speedup can be
improved. We also present analysis of various factors
that can have drastic effect on the speedup, which
allows the user to tune a number of parameters to
maximize the performance.",
acknowledgement = ack-nhfb,
affiliation = "Hong Kong Univ of Science and Technology",
affiliationaddress = "Kowloon, Hong Kong",
classification = "722.2; 722.3; 722.4; 723.1; 921.2; 921.6; C4170
(Differential equations); C5620L (Local area networks);
C6150N (Distributed systems software)",
conference = "Proceedings of the 19th Annual International Computer
Software and Applications Conference COMPSAC '95",
journalabr = "Proc IEEE Comput Soc Int Comput Software Appl Conf",
keywords = "Application; Asynchronous communication library
routines; Bottlenecks; Computer software; Computer
workstations; Data communication systems; Distributed
systems; Finite difference equations; Finite difference
method; Mathematical models; Parallel platform;
Parallel virtual machine (PVM) system; Partial
differential equation solving; Partial differential
equations; Partitioning strategy; Performance
prediction; Performance recording; Programming aids;
PVM; Software platform; Speedup, Parallel processing
systems; Workstation network",
meetingaddress = "Dallas, TX, USA",
meetingdate = "Aug 9--11 1995",
meetingdate2 = "08/09--11/95",
sponsor = "IEEE",
thesaurus = "Finite difference methods; Local area networks;
Parallel processing; Partial differential equations;
Software performance evaluation; Workstations",
}
@MastersThesis{Humphres:1995:LBE,
author = "Christopher Wade Humphres",
title = "A load balancing extension for the {PVM} software
system",
type = "M.E.E. thesis",
school = inst-UAL-EE,
address = inst-UAL-EE:adr,
pages = "viii + 98",
year = "1995",
bibdate = "Mon Jan 15 16:50:57 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "Computer networks; Parallel computers.",
}
@Article{Hungenahally:1995:PIQ,
author = "A. Hungenahally and A. Suresh",
title = "{PVM} implementation of quadtree building algorithms
on {SIMD} hypercube system",
journal = j-IEEE-INT-CONF-ALG-ARCH-PAR-PROC,
volume = "2",
pages = "855--858",
month = "????",
year = "1995",
bibdate = "Fri May 24 09:58:00 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95TH0682-5.",
abstract = "Representation of Data using hierarchical data
structures is commonly used in applications such as
Computer graphics, Digital image processing, Computer
Vision and techniques are being evolved for efficient
representation of these data. Transforming bilevel
images to linear quadtrees is a way of representing the
high-volume data. In this paper, the preliminary
investigation and results thus obtained for
transforming binary images to linear quadtrees using
Parallel Virtual Machine System Software are presented.
Single Instruction Multiple Data hypercube algorithms
implemented using PVM software was tested under DOS
operating system on IBM compatible PCs. The quadtree
algorithm generates locational codes in pre-order and
generally runs in O(log n) time and this paper tested
the feasibility of achieving this time for an SIMD
machine.",
acknowledgement = ack-nhfb,
affiliation = "Griffith Univ",
affiliationaddress = "Brisbane, Aust",
classification = "722.4; 723; 723.2",
conference = "Proceedings of the IEEE 1st International Conference
on Algorithms and Architectures for Parallel
Processing. Part 2 (of 2)",
fjournal = "IEEE International Conference on Algorithms and
Architectures for Parallel Processing",
journalabr = "IEEE Int Conf Algorithms Archit Parall Process",
keywords = "Codes (symbols); Computer software; Data structures;
DOS; Hierarchical data structures; Hypercube; Image
processing; Parallel algorithms; Parallel processing
systems; Parallel virtual machine; Personal computers;
Quadtree; Single instruction multiple data",
meetingaddress = "Brisbane, Aust",
meetingdate = "Apr 19--21 1995",
meetingdate2 = "04/19--21/95",
sponsor = "IEEE",
}
@Article{Ingle:1995:MAS,
author = "N. K. Ingle and T. J. Mountziaris",
title = "A multifrontal algorithm for the solution of large
systems of equations using network-based parallel
computing",
journal = j-COMP-CHEM-ENG,
volume = "19",
number = "6-7",
pages = "671--681",
month = jun # "--" # jul,
year = "1995",
CODEN = "CCENDW",
ISSN = "0098-1354",
ISSN-L = "0098-1354",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Chem. Eng., State Univ. of New York, Buffalo,
NY, USA",
classification = "C4140 (Linear algebra); C4160 (Numerical integration
and differentiation); C4170 (Differential equations);
C4185 (Finite element analysis); C4240P (Parallel
programming and algorithm theory); C7320 (Physics and
chemistry computing)",
fjournal = "Computers \& Chemical Engineering",
keywords = "Chemical vapor deposition; Distributed computing
environment; Finite element analysis; Flow;
Granularity; Heat transfer problem; In-core
computations; Intrinsic fault tolerance capabilities;
Large sparse equation systems; Multifrontal algorithm;
Network-based parallel computing; Networked
workstations; Out-of-core computations; Parallel
Virtual Machine software; Performance; Processors;
Reaction processes; Speedups; Thin films; Transport
processes",
pubcountry = "UK",
thesaurus = "Chemical reactions; Chemical vapour deposition;
Chemically reactive flow; Chemistry computing;
Differential equations; Finite element analysis; Heat
transfer; Integration; Parallel algorithms; Physics
computing; Software fault tolerance; Sparse matrices;
Thin films; Transport processes; Workstations",
}
@TechReport{Jann:1995:AMP,
author = "Joefon Jann and Hubertus Franke",
title = "Analysis of an {MPI} program using {UTE} on the {IBM
SP2}",
type = "Research report",
number = "RC 20085 (88832)",
institution = inst-IBM-WATSON,
address = inst-IBM-WATSON:adr,
pages = "11",
year = "1995",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We describe an implementation of a 2D-FFT
(Complex-Complex) program in MPI-F on the SP2 and show
its actual performance. The purpose of this paper is to
illustrate how we use the new tracing utility UTE/MPI
provided in MPI-F to verify the correctness of our
algorithm, to provide timing statistics summaries, and
to unravel other system activities, often unexpected by
the user, that affect the total elapsed time of the
program.",
acknowledgement = ack-nhfb,
keywords = "Multiprocessors",
}
@Article{Jeremiassen:1995:RFS,
author = "T. E. Jeremiassen and S. J. Eggers",
title = "Reducing false sharing on shared memory
multiprocessors through compile time data
transformations",
journal = j-SIGPLAN,
volume = "30",
number = "8",
pages = "179--188",
month = aug,
year = "1995",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 7 07:51:54 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We have developed compiler algorithms that analyze
explicitly parallel programs and restructure their
shared data to reduce the number of false sharing
misses. The algorithms analyze per-process shared data
accesses, pinpoint the data structures that are
susceptible to false sharing and choose an appropriate
transformation to reduce it. The transformations either
group data that is accessed by the same processor or
separate individual data items that are shared. We
evaluates that technique. We show through simulation
that our analysis successfully identifies the data
structures that are responsible for most false sharing
misses, and then transforms them without unduly
decreasing spatial locality. The reduction in false
sharing positively impacts both execution time and
program scalability when executed on a KSR2. Both
factors combine to increase the maximum achievable
speedup for all programs, more than doubling it for
several. Despite being able to only approximate actual
inter-processor memory accesses, the compiler-directed
transformations always outperform programmer efforts to
eliminate false sharing.",
acknowledgement = ack-nhfb,
affiliation = "AT and T Bell Labs., Murray Hill, NJ, USA",
classification = "C6120 (File organisation); C6150C (Compilers,
interpreters and other processors); C6150N (Distributed
systems software)",
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "Compile time data transformations; Compiler
algorithms; Data structures; Execution time; False
sharing; False sharing misses; Inter-processor memory
access; KSR2; Maximum achievable speedup; Parallelizing
compilers; Program scalability; Shared data access;
Shared memory multiprocessors; Simulation; Spatial
locality",
thesaurus = "Data structures; Parallel programming; Program
compilers; Shared memory systems; Virtual machines",
}
@Article{Jin:1995:LTP,
author = "Lan Jin and Lan Yang",
title = "A laboratory for teaching parallel computing on
parallel structures",
journal = j-SIGCSE,
volume = "27",
number = "1",
pages = "71--75",
month = mar,
year = "1995",
CODEN = "SIGSD3",
ISSN = "0097-8418 (print), 2331-3927 (electronic)",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., California State Univ., Fresno,
CA, USA",
classification = "C0220 (Computing education and training); C5220P
(Parallel architecture); C6110P (Parallel programming);
C6150N (Distributed systems software); C7430 (Computer
engineering)",
fjournal = "SIGCSE Bulletin (ACM Special Interest Group on
Computer Science Education)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688",
keywords = "Hardware level; Message-passing programming teaching;
Multi-computer; Parallel computing teaching laboratory;
Parallel processing; Parallel structure; Parallel
systems; Parallel Virtual Machine; PVM;
Reconfiguration; Software level; Structural
implementation",
thesaurus = "Computer science education; Laboratories; Message
passing; Parallel machines; Parallel programming;
Reconfigurable architectures; Teaching",
}
@InProceedings{Juric:1995:UPV,
author = "M. Juric and W. D. Potter and M. Plaksin",
title = "Using the {Parallel Virtual Machine} for hunting
snake-in-the-box codes",
crossref = "Arabnia:1995:TRA",
pages = "97--102",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci. and Inf. Syst., DePaul Univ.,
Chicago, IL, USA",
classification = "C1180 (Optimisation techniques); C4230M
(Multiprocessor interconnection); C6150E (General
utility programs); C6150G (Diagnostic, testing,
debugging and evaluating systems); C6150N (Distributed
systems software)",
corpsource = "Dept. of Comput. Sci. and Inf. Syst., DePaul Univ.,
Chicago, IL, USA",
keywords = "adapted code; Adapted code; algorithm; combinatorial
explosion; Combinatorial explosion; genetic; Genetic
algorithm; genetic algorithms; hypercube networks;
maximum length snake; Maximum length snake;
multiprocessing; parallel; parallel machines; Parallel
single processor machine cluster; Parallel Virtual
Machine; programs; PVM software package; single
processor machine cluster; snake-in-the-box code
hunting; Snake-in-the-box code hunting; system
monitoring; systems; transputer; utility programs;
virtual machines",
pubcountry = "Netherlands",
thesaurus = "Genetic algorithms; Hypercube networks;
Multiprocessing programs; Parallel machines; System
monitoring; Transputer systems; Utility programs;
Virtual machines",
treatment = "P Practical",
}
@InProceedings{Kalns:1995:DPD,
author = "E. T. Kalns and L. M. Ni",
title = "{DaReL}: a portable data redistribution library for
distributed-memory machines",
crossref = "IEEE:1995:PSP",
pages = "78--87",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
classification = "C5440 (Multiprocessing systems); C6110B (Software
engineering techniques); C6110P (Parallel programming);
C6140D (High level languages); C6150N (Distributed
systems software)",
keywords = "Algorithm computation; Algorithm phases; DaReL; Data
decomposition; Data exchange; Data parallel Fortran
languages; Distributed memory platforms;
Distributed-memory machines; High Performance Fortran;
HPF; IBM SP-1; Message passing standard; MPI
primitives; Multi-dimensional data redistribution;
Portable data redistribution library; Processor
memories; Program overhead; Regular distribution
patterns; Run-time data redistribution; Run-time data
redistribution primitives",
thesaurus = "Distributed memory systems; FORTRAN; Message passing;
Parallel languages; Parallel programming; Software
libraries; Software portability; Software standards;
Subroutines",
}
@InProceedings{Katkere:1995:VBW,
author = "A. Katkere and J. Schlenzig and R. Jain",
title = "{VRML-Based WWW} Interface to {MPI} Video",
crossref = "Nadeau:1995:SVR",
pages = "25--31, 137",
month = "????",
year = "1995",
bibdate = "Thu Mar 28 05:45:25 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Visual Comput. Lab., California Univ., San Diego, La
Jolla, CA, USA",
keywords = "SGML; Virtual Reality Modeling Language; VRML",
xxpages = "25--32",
}
@InProceedings{Kauranne:1995:OHM,
author = "T. Kauranne and J. Oinonen and S. Saarinen and O.
Serimaa and J. Hietaniemi",
title = "The operational {HIRLAM} 2 model on parallel computers
(weather forecasting)",
crossref = "Hoffmann:1995:CAP",
pages = "63--74",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Joensuu Univ., Finland",
classification = "A9260X (Weather analysis and prediction); C4185
(Finite element analysis); C6110P (Parallel
programming); C7340 (Geophysics computing)",
keywords = "Atmosphere; Binary GRIB files; Distributed memory
computers; Helmholtz equation solver; Maintainability;
Message passing interface; Meteorology; Numerical
model; Operational HIRLAM 2 model; Parallel
programming; Portability; Reproducibility;
Semi-implicit Eulerian finite difference method; Serial
code noninterference; Transposition strategy; Weather
forecasting",
thesaurus = "Digital simulation; Distributed processing; Finite
difference methods; Finite element analysis; Geophysics
computing; Message passing; Numerical analysis;
Parallel processing; Parallel programming; Software
maintenance; Software portability; Weather
forecasting",
}
@InProceedings{Klingebiel:1995:COD,
author = "P. Klingebiel and R. Diekmann and U. Lefarth and M.
Fischer and J. Seuss",
title = "{CAMeL\slash PVM}: an open, distributed {CAE}
environment for modelling and simulating mechatronic
systems",
crossref = "Breitenecker:1995:ESC",
pages = "645--650",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Mechatronics Lab., Paderborn Univ., Germany",
classification = "C6150N (Distributed systems software); C7440 (Civil
and mechanical engineering computing)",
keywords = "Ada tasking; Automatic load balancing procedures;
CAMeL/PVM; Channels; Communication management;
Computer-aided engineering design environment;
Computer-Aided Mechatronic Laboratory; Computer-aided
modelling; Heterogeneous workstation clusters;
Mechatronic systems simulation; Message-passing
environment; Open distributed CAE environment; Parallel
Virtual Machine; Process management; Program modules;
Unix-based extension",
thesaurus = "Computer aided engineering; Digital simulation;
Mechanical engineering computing; Mechatronics; Message
passing; Open systems; Parallel processing; Resource
allocation; Unix; Virtual machines",
}
@InProceedings{Klingebiel:1995:CPO,
author = "P. Klingebiel and R. Diekmann and U. Lefarth and M.
Fischer and J. Seuss",
title = "{CAMeL\slash PVM}: An Open, Distributed {CAE}
Environment for Modelling and Simulating Mechatronic
Systems",
crossref = "Breitenecker:1995:ESC",
pages = "645--650",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150N (Distributed systems software); C7440 (Civil
and mechanical engineering computing)",
corpsource = "Mechatronics Lab., Paderborn Univ., Germany",
keywords = "Ada tasking; aided modelling; automatic load
balancing; CAMeL/PVM; channels; communication; computer
aided engineering; computer-; Computer-Aided;
computer-aided engineering design environment; digital
simulation; engineering computing; extension;
heterogeneous workstation clusters; management;
mechanical; Mechatronic Laboratory; mechatronic systems
simulation; mechatronics; message passing;
message-passing environment; open; open distributed CAE
environment; Parallel; parallel processing; procedures;
process management; program modules; resource
allocation; systems; Unix; Unix-based; Virtual Machine;
virtual machines",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@InProceedings{Kofakis:1995:DPI,
author = "P. Kofakis and J. Louis",
title = "Distributed parallel implementation of seismic
algorithms",
crossref = "Hassanzadeh:1995:MMG",
journal = j-PROC-SPIE,
volume = "2571",
pages = "229--238",
year = "1995",
CODEN = "PSISDG",
ISSN = "0277-786X (print), 1996-756X (electronic)",
ISSN-L = "0277-786X",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "METHOD Ltd., Holargos, Greece",
classification = "A9130F (Seismic waves); A9365 (Data and information;
A9385 (Instrumentation and techniques for geophysical,
hydrospheric and lower atmosphere research);
acquisition, processing, storage and dissemination in
geophysics); C1180 (Optimisation techniques); C4130
(Interpolation and function approximation); C4170
(Differential equations); C5260 (Digital signal
processing); C6110P (Parallel programming); C6150N
(Distributed systems software); C7340 (Geophysics
computing)",
fjournal = "Proceedings of the SPIE --- The International Society
for Optical Engineering",
keywords = "Distributed parallel implementation; Eikonical
equations; Fermat's principle; Finite difference
extrapolation; First arrival; Heterogeneous
workstations; Irregular grids; Minimum time ray-tracer;
Parallel virtual machine; Seismic algorithms; Seismic
waves; Travel times",
thesaurus = "Distributed memory systems; Extrapolation; Finite
difference methods; Geophysical signal processing;
Local area networks; Minimisation; Parallel algorithms;
Ray tracing; Seismic waves",
}
@Article{Koski:1995:STL,
author = "Kimmo Koski",
title = "A step towards large scale parallelism: {Building} a
parallel computing environment from heterogeneous
resources",
journal = j-FUT-GEN-COMP-SYS,
volume = "11",
number = "4--5",
pages = "491--498",
month = aug,
year = "1995",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jul 15 09:06:06 MDT 2005",
bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
affiliation = "Centre for Sci. Comput., Espoo, Finland",
classification = "C0200 (General computer topics); C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C6110P
(Parallel programming); C6115 (Programming support);
C6150C (Compilers, interpreters and other processors);
C6150N (Distributed systems software)",
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
keywords = "Center for Scientific Computing; Competition; Computer
industry; Cray Future Generation MPP system; Efficient
resource use; Heterogeneous resources; IBM SP2
distributed memory system; Large-scale parallelism;
Load balancing; Massively parallel processing;
Metacomputing; Parallel compiler technology; Parallel
computing environment; Parallel programming; Parallel
shared memory systems; Parallel tools selection; PVM
clusters; RISC processors; Risks; Supported software
tools; User base training; Vector system vendors",
pubcountry = "Netherlands",
thesaurus = "Cray computers; DP industry; Parallel processing;
Parallelising compilers; Reduced instruction set
computing; Resource allocation; Software tools;
Training",
}
@Article{Kumar:1995:MWD,
author = "S. Kumar and H. Adeli",
title = "Minimum weight design of large structures on a network
of workstations",
journal = j-MICROCOMP-CIVIL-ENG,
volume = "10",
number = "6",
pages = "423--432",
month = nov,
year = "1995",
CODEN = "MCENE7",
ISSN = "0885-9507",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Civil Eng., Ohio State Univ., Columbus, OH,
USA",
classification = "C1180 (Optimisation techniques); C4240P (Parallel
programming and algorithm theory); C5620L (Local area
networks); C6150N (Distributed systems software); C7440
(Civil and mechanical engineering computing)",
fjournal = "Microcomputers in Civil Engineering",
keywords = "Coarse-grained applications; Computational capability;
Distributed algorithm; Generic algorithms; Granularity;
Large structures; Local area networks; Low cost;
Message passing; Microprocessors; Minimum weight
design; Optimization; Parallel Virtual Machine;
Performance estimates; Software library; Structural
optimization; Workstation network",
thesaurus = "Distributed algorithms; Genetic algorithms; Local area
networks; Message passing; Software libraries;
Structural engineering computing",
}
@InProceedings{Leung:1995:EPE,
author = "K.-C. Leung and M. Hamdi",
title = "Evaluating {PVM} and {Express} on Various Network
Clusters",
crossref = "Alnuweiri:1995:PHF",
pages = "57--66",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Li:1995:CPP,
author = "Liwei Li and Paul S. Wang",
title = "The {CL-PVM} Package",
journal = j-SIGSAM,
volume = "29",
number = "3--4",
pages = "2--8",
month = dec,
year = "1995",
CODEN = "SIGSBZ",
ISSN = "0163-5824 (print), 1557-9492 (electronic)",
ISSN-L = "0163-5824",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "Dept. of Math. and Comput. Sci., Kent State Univ., OH,
USA",
fjournal = "SIGSAM Bulletin",
keywords = "artificial intelligence systems; C programs; CL-PVM
package; Common Lisp interface; console program; expert
systems; Fortran 77 interface; hostfile;
knowledge-based systems; library functions; LISP; Lisp
top level; Lisp-based; machines; open systems;
operating systems (computers); parallel; Parallel
Virtual Machine; parallel/concurrent computing
facility; programming; programs; PVM Library routines;
run-time server; software libraries; software package;
software packages; symbolic computation systems;
virtual",
treatment = "P Practical",
}
@Article{Lin:1995:DNC,
author = "Mengjou Lin and J. Hsieh and D. H. C. Du and J. P.
Thomas and J. A. MacDonald",
title = "Distributed network computing over local {ATM}
networks",
journal = j-IEEE-J-SEL-AREAS-COMMUN,
volume = "13",
number = "4",
pages = "733--748",
month = may,
year = "1995",
CODEN = "ISACEM",
DOI = "https://doi.org/10.1109/49.382163",
ISSN = "0733-8716 (print), 1558-0008 (electronic)",
ISSN-L = "0733-8716",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Adv. Technol. Group, Apple Comput. Inc., Cupertino,
CA, USA",
classification = "B0290H (Linear algebra); B0290P (Differential
equations); B6150M (Protocols); B6210L (Computer
communications); B6230 (Switching centres and
equipment); C4140 (Linear algebra); C4170 (Differential
equations); C5220P (Parallel architecture); C5620L
(Local area networks); C5640 (Protocols); C5670
(Network performance); C6150J (Operating systems)",
fjournal = "IEEE Journal on Selected Areas in Communications",
keywords = "Application programming interfaces; ASX-100 ATM
switch; Asynchronous transfer mode; ATM API; BSD socket
programming interface; Communication performance;
Communication protocol layer; Distributed network
computing; Distributed programming; End-to-end
communication; Fore Systems; High-speed local area
networks; High-speed network standards; Local ATM
network; Message passing library; Parallel matrix
multiplication; Parallel virtual machine; Performance
characteristics; Processors; Remote procedure call;
Workstations",
thesaurus = "Application program interfaces; Asynchronous transfer
mode; Local area networks; Matrix multiplication;
Partial differential equations; Performance evaluation;
Pipeline processing; Protocols; Remote procedure
calls",
}
@Article{Liu:1995:WCD,
author = "Xiaomao Liu",
title = "Workstations cluster for distributed supercomputing",
journal = j-MINI-MICRO-SYSTEMS,
volume = "16",
number = "2",
pages = "45--52",
month = feb,
year = "1995",
CODEN = "XWJXEH",
ISSN = "1000-1220",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "North China Inst. of Comput. Technol., Beijing,
China",
classification = "C5620L (Local area networks); C6150N (Distributed
systems software)",
fjournal = "Mini-Micro Systems",
keywords = "Active message communication; Distributed
supercomputing; Global UNIX; MPI; Workstations
cluster",
language = "Chinese",
pubcountry = "China",
thesaurus = "Distributed processing; Local area networks",
}
@InProceedings{Lou:1995:PIN,
author = "J. Z. Lou",
title = "A parallel incompressible {Navier--Stokes} solver with
multigrid iterations",
crossref = "Bailey:1995:PSS",
pages = "167--168",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Jet Propulsion Lab., California Inst. of Technol.,
Pasadena, CA, USA",
classification = "A0230 (Function theory, analysis); A0260 (Numerical
approximation and analysis); A0270 (Computational
techniques); A4710 (General fluid dynamics theory,
simulation and other computational methods); C4170
(Differential equations); C4240P (Parallel programming
and algorithm theory); C6110P (Parallel programming);
C7320 (Physics and chemistry computing)",
keywords = "Domain-decomposition strategy; Efficient
finite-difference incompressible Navier--Stokes fluid;
Intel Delta; Intel Paragon; Message-passing; Multigrid
iterations; Multigrid scheme; Parallel incompressible
Navier--Stokes solver; Rectangular processor meshes;
Second-order projection method; Staggered grid;
Template code",
thesaurus = "Finite difference methods; Message passing;
Navier--Stokes equations; Parallel algorithms; Physics
computing",
}
@InProceedings{Ludwig:1995:PPF,
author = "T. Ludwig and S. Lamberts",
title = "{PFSLib} --- a parallel file system for workstation
clusters",
crossref = "Malyshkin:1995:PCT",
pages = "246--251",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
classification = "C6110P (Parallel programming); C6120 (File
organisation); C6150N (Distributed systems software)",
keywords = "Parallel file system; Parallel programming
environments; Performance results; PFS; PFSLib library;
Workstation clusters",
thesaurus = "File organisation; Parallel programming; Software
performance evaluation",
}
@InProceedings{Lumsdaine:1995:WIM,
author = "A. Lumsdaine and J. M. Squyres and M. W. Reichelt",
title = "Waveform iterative methods for parallel solution of
initial value problems",
crossref = "IEEE:1995:PSP",
pages = "88--97",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci. and Eng., Notre Dame Univ., IN,
USA",
classification = "C4130 (Interpolation and function approximation);
C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory); C5440
(Multiprocessing systems)",
keywords = "Communication latency; Differential equations;
Differential-algebraic equations; Initial value
problems; Linear system; Message-passing; MOSFET
simulation; MPI-based implementation; Parallel
solution; Semiconductor device simulation program;
Synchronization; Time dependent semiconductor
drift-diffusion equations; Waveform iterative methods;
Waveform relaxation; Workstations",
thesaurus = "Circuit analysis computing; Differential equations;
Digital simulation; Initial value problems; Iterative
methods; Message passing; Parallel algorithms",
}
@InProceedings{Manke:1995:MPP,
author = "J. W. Manke and J. C. Patterson",
title = "Message passing performance of {Intel Paragon}, {IBM
SP1} and {CRAY T3D} using {PVM}",
crossref = "Bailey:1995:PSS",
pages = "768--769",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Boeing Comput. Services, Seattle, WA, USA",
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C6150N (Distributed systems
software)",
corpsource = "Boeing Comput. Services, Seattle, WA, USA",
keywords = "all-to-all communication; All-to-all communication;
Cray computers; CRAY T3D; distributed applications;
Distributed applications; IBM computers; IBM SP1; Intel
Paragon; message passing; Message passing performance;
message passing time model; Message passing time model;
MPP machines; multiprocessing systems; nodes; Nodes;
performance; performance evaluation; PVM; recursive
doubling; Recursive doubling; scalability; Scalability;
speed; Speed; timing",
thesaurus = "Cray computers; IBM computers; Message passing;
Multiprocessing systems; Performance evaluation;
Timing",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Mantovani:1995:HPS,
author = "M. L. Mantovani and M. Malagoli",
title = "Highly parallel {SCF} calculation: the {SYSMO}
Program",
crossref = "IEEE:1995:PEW",
pages = "502--507",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "CICAIA, Modena Univ., Italy",
classification = "C4240P (Parallel programming and algorithm theory);
C7320 (Physics and chemistry computing)",
keywords = "Highly parallel SCF calculation; Linear Combination of
Atomic Orbitals Self Consistent Field algorithm;
Parallel implementation; Parallel Virtual Machine;
Scalability; Single program multiple data level; SYSMO
program; System Modena",
thesaurus = "Chemistry; Chemistry computing; LCAO calculations;
Parallel algorithms; SCF calculations",
}
@InProceedings{Martin:1995:DPC,
author = "I. Martin and J. C. Fabero and F. Tirado and A.
Bautista",
title = "Distributed Parallel Computers versus {PVM} on a
Workstation Cluster in the Simulation of Time Dependent
Partial Differential Equations",
crossref = "IEEE:1995:PEW",
pages = "20--26",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. de Inf. y Autom., Univ. Complutense de Madrid,
Spain",
classification = "A0270 (Computational techniques); A0340K (Waves and
wave propagation: general mathematical aspects); C4170
(Differential equations); C6110P (Parallel
programming)",
corpsource = "Dept. de Inf. y Autom., Univ. Complutense de Madrid,
Spain",
keywords = "distributed parallel computers; equation; finite
difference method; large-scale problems; message
passing; numerical simulation; parallel; parallel
algorithms; parallel computing; parallel machine;
partial differential equations; performance evaluation;
PVM; Schr{\"o}dinger; Schr{\"o}dinger equation;
Sparc-stations; time dependent; wave equations;
workstation cluster; Workstation cluster; PVM;
Distributed parallel computers; Time dependent; Partial
differential equations; Parallel numerical simulation;
Schr{\"o}dinger equation; Message passing parallel
machine; Sparc-stations; Finite difference method;
Parallel computing; Large-scale problems",
sponsororg = "Euromicro; Assoc.Italiana per Inf. Calcolo Autom",
thesaurus = "Parallel algorithms; Performance evaluation;
Schr{\"o}dinger equation; Wave equations",
treatment = "T Theoretical or Mathematical",
}
@Article{Matise:1995:PCG,
author = "T. C. Matise and M. D. Schroeder and D. M. Chiarulli
and D. E. Weeks",
title = "Parallel Computation of Genetic Likelihoods Using
{CRI-MAP}, {PVM}, and a Network of Distributed
Workstations",
journal = j-HUMAN-HEREDITY,
volume = "45",
number = "2",
pages = "103--??",
month = "????",
year = "1995",
CODEN = "HUHEAS",
ISSN = "0001-5652",
ISSN-L = "0001-5652",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Human Heredity",
}
@Article{Mattson:1995:PEP,
author = "Timothy G. Mattson",
title = "Programming Environments for Parallel and Distributed
Computing: a Comparison of {P4}, {PVM}, {Linda}, and
{TCGMSG}",
journal = j-IJSAHPC,
volume = "9",
number = "2",
pages = "138--161",
month = "Summer",
year = "1995",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib; UnCover
library database",
abstract = "Parallel programmers must choose from a confusing
array of parallel programming environments. When
success has to be measured by the success of
application-software development rather than
theoretical research, the choice must be made quickly
without the luxury of experimentation. In this paper,
we help the programmer make this choice by looking
closely at four of the most heavily used portable
programming environments --- p4, PVM, TCGMSG, and
Linda. For each of these programming environments, we
look at three different programs: one that computes
\$pi by numerical integration and two that benchmark
communication performance. The four programming
environments are analyzed in terms of performance,
support, ease of coding, and ease of debugging.",
acknowledgement = ack-nhfb,
affiliation = "Intel Corp",
affiliationaddress = "Beaverton, OR, USA",
classification = "722.3; 722.4; 723.1; 723.5; 921.6; C0310F (Software
development management); C6110P (Parallel programming);
C6115 (Programming support); C6150N (Distributed
systems software)",
corpsource = "Div. of Supercomputer Syst., INTEL Corp., Beaverton,
OR, USA",
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
journalabr = "Int J Supercomput Appl High Perform Comput",
keywords = "Application software development; application software
development; communication; Communication benchmarks;
Communication performance; Computer software; Data
communication systems; Distributed computer systems;
Distributed computing; distributed computing;
Integration; Linda; Numerical integration; numerical
integration; P4; p4; parallel; Parallel processing
systems; Parallel programmers; parallel programmers;
parallel programming; Parallel programming
environments; Parallel virtual machine; performance; Pi
calculation; pi calculation; portability; Portable
programming environments; portable programming
environments; Program debugging; Programming
environments; programming environments; PVM; software;
Software engineering; software reviews; TCGMSG",
thesaurus = "Parallel programming; Programming environments;
Software portability; Software reviews",
treatment = "P Practical",
}
@InProceedings{Mehra:1995:AIM,
author = "P. Mehra and B. {Van Voorst} and J. Yan",
title = "Automated Instrumentation, Monitoring and
Visualization of {PVM} Programs",
crossref = "Bailey:1995:PSS",
pages = "832--837",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Recom Technol. Inc., NASA Ames Res. Center, Moffett
Field, CA, USA",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150G (Diagnostic, testing, debugging and
evaluating systems); C6150N (Distributed systems
software)",
corpsource = "Recom Technol. Inc., NASA Ames Res. Center, Moffett
Field, CA, USA",
keywords = "Automated; automated instrumentation; Automated
instrumentation; Automated Instrumentation and
Monitoring System; data visualisation; Instrumentation
and Monitoring System; message passing; message-passing
parallel programs; Message-passing parallel programs;
monitoring; parallel programming; parallel programs;
Parallel programs; Parallel Virtual Machine;
performance-debugging; Performance-debugging toolkit;
program debugging; program monitoring; Program
monitoring; program visualization; Program
visualization; programming environments; PVM programs;
system; toolkit; visual programming; workstation
networks; Workstation networks",
thesaurus = "Data visualisation; Message passing; Parallel
programming; Program debugging; Programming
environments; System monitoring; Visual programming",
treatment = "P Practical",
}
@Article{Michielse:1995:PMU,
author = "Peter Michielse",
title = "Parallel multigrid using {PVM}",
journal = j-APPL-NUM-MATH,
volume = "19",
number = "1-2",
pages = "63--69",
month = nov,
year = "1995",
CODEN = "ANMAEL",
ISSN = "0168-9274 (print), 1873-5460 (electronic)",
ISSN-L = "0168-9274",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper discusses an implementation of a parallel
multigrid method using Parallel Virtual Machine (PVM).
The basics described here apply to general parallel
computers, either with shared memory or distributed
memory systems. The actual implementation has been
performed on both type of systems, although we will
focus on distributed memory systems in this paper. The
distributed memory implementation has been done using
PVM on Convex MetaSeries machines. The speed-up results
will be analyzed with respect to computational work and
communicational overhead.",
acknowledgement = ack-nhfb,
affiliation = "CONVEX Computer Corp",
affiliationaddress = "Utrecht, Neth",
classification = "512.1.1; 721.1; 722.1; 722.4; 723.5; 921.6; C4170
(Differential equations); C4240P (Parallel programming
and algorithm theory); C5440 (Multiprocessing systems);
C7310 (Mathematics computing)",
corpsource = "CONVEX Comput. Corp., Utrecht, Netherlands",
fjournal = "Applied Numerical Mathematics: Transactions of IMACS",
journal-URL = "http://www.sciencedirect.com/science/journal/01689274",
journalabr = "Appl Numer Math",
keywords = "Approximation theory; communicational overhead;
Computational methods; computational work; Computer
simulation; Convex MetaSeries; Data storage equipment;
differential equations; Distributed computer systems;
distributed memory systems; Distributed memory systems;
machines; mathematics computing; Numerical methods;
parallel algorithms; parallel multigrid; Parallel
multigrid; Parallel processing systems; parallel
virtual machine; Parallel virtual machine; Petroleum
reservoirs; Reservoir simulators; shared memory; Shared
memory systems; systems",
pubcountry = "Netherlands",
treatment = "A Application; P Practical",
}
@Article{Mirvis:1995:HML,
author = "Y. Mirvis and F. Abdi and B. Lajevardi and P. Murthy",
title = "Hierarchical multi-level optimization solution for
massive parallel simulation of composite system",
journal = j-AIAA-ASME-ASCE-AHS-STRUCT-STRUCT-DYN-MAT-CONF,
volume = "4",
month = "????",
year = "1995",
CODEN = "CPSCDO",
ISSN = "0273-4508",
bibdate = "Fri May 24 09:58:00 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Multilevel optimization techniques have been utilized
to speed up simulation processing time for the analysis
and modeling of high temperature composite structures.
The Network Computing System (NCS) tools were utilized
using the GENOA-Maestro, and Parallel Virtual Machine
(PVM) toolkits for heterogeneous distributed computing,
to make it portable across the range of parallel
hardware architecture.",
acknowledgement = ack-nhfb,
affiliation = "Alpha STAR Research Corp",
affiliationaddress = "Los Angeles, CA, USA",
classification = "408.1; 415.4; 721.1; 722.4; 723.5; 731.1",
conference = "Proceedings of the 36th AIAA\slash ASME\slash
ASCE\slash AHS\slash ASC Structures, Structural
Dynamics, and Materials Conference and AIAA\slash ASME
Adaptive Structures Forum. Part 4 (of 5)",
fjournal = "AIAA/ASME/ASCE/AHS Structures, Structural Dynamics \&
Materials Conference --- Collection of Technical
Papers",
journalabr = "AIAA ASME ASCE AHS Struct Struct Dyn Mater Conf
Collect Tech Pap",
keywords = "Composite structures; Computational complexity;
Computer aided analysis; Computer simulation;
Constraint theory; Hierarchical systems; Mathematical
models; Multilevel optimization technique; Network
computing system (NCS); Optimization; Parallel
processing systems; Parallel virtual machine (PVM);
Software package GENOA-Maestro; Software package PVM;
Structural analysis",
meetingdate = "Apr 10--13 1995",
}
@InProceedings{Morinishi:1995:PIB,
author = "K. Morinishi and N. Satofuka",
title = "Parallel implementation of the {Boltzmann} equation
solvers using {PVM}",
crossref = "Satofuka:1995:PCF",
pages = "339--346",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A4710 (General fluid dynamics theory, simulation and
other computational methods); A4745 (Rarefied gas
dynamics); C4180 (Integral equations); C4240P (Parallel
programming and algorithm theory); C7320 (Physics and
chemistry computing); C7460 (Aerospace engineering
computing)",
corpsource = "Dept. of Mech. and Syst. Eng., Kyoto Inst. of
Technol., Japan",
keywords = "aerodynamics; aerospace; aerospace computing; BGK
model; Boltzmann; Boltzmann collision integral;
Boltzmann equation; CFD; computational; computing;
digital simulation; equation solver; external flows;
flow simulation; fluid dynamics; message; message
passing software; NACA0012 airfoil; parallel
programming; passing; physics computing; PVM; rarefied;
rarefied gas flow",
pubcountry = "Netherlands",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Mork:1995:DPP,
author = "P. Mork",
title = "Debugging parallel programs with execution tracing",
crossref = "Ferenczi:1995:PAH",
pages = "176--183",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Control Eng., Miskolc Univ., Hungary",
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C6130B (Graphics techniques); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems software)",
keywords = "CASE tool; Converter; Datafile; Execution tracing;
Graphical tool; PACVIS; ParaGraph; Parallel program
debugging; Parallel software engineering; Pvm3; Raw
trace data transformation; SEPP project; Software
Engineering for Parallel Processing project; Tools;
Trace file visualization; Visualization program",
thesaurus = "Computer aided software engineering; Data
visualisation; Parallel programming; Program debugging;
Software tools",
}
@Article{Morton:1995:LLP,
author = "Don Morton and Kefei Wang and David O. Ogbe",
title = "Lessons learned in porting {Fortran\slash PVM} code to
the {Cray T3D}",
journal = j-IEEE-PAR-DIST-TECH,
volume = "3",
number = "1",
pages = "4--11",
month = "Spring",
year = "1995",
CODEN = "IPDTEX",
DOI = "https://doi.org/10.1109/88.384580",
ISSN = "1063-6552 (print), 1558-1861 (electronic)",
ISSN-L = "1063-6552",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "With an extra work from the programmer, the Cray T3D
offers low-level facilities for achieving substantial
performance gains. Because of this, it is often
necessary to consider the tradeoffs between performance
gains and coding effort. Here, provided is a first-hand
account of the issues in porting Fortran\slash PVM code
to the Cray T3D. As a new evolving product, occasional
problems with the T3D should be expected.",
acknowledgement = ack-nhfb,
affiliation = "Cameron Univ",
affiliationaddress = "Lawton, OK, USA",
classification = "722.2; 722.3; 722.4; 723.1; 723.1.1; 723.2; C5440
(Multiprocessing systems); C6110B (Software engineering
techniques); C6110P (Parallel programming); C6140D
(High level languages)",
fjournal = "IEEE parallel and distributed technology: systems and
applications",
journalabr = "IEEE Parallel Distrib Technol",
keywords = "Algorithms; Central processing unit; Central
processing unit, Fortran/PVM code porting; Code
porting; Codes (symbols); Coding effort; coding effort;
Coding effort; coding effort; Computer aided software
engineering; Computer software portability; Computer
workstations; Cray computers; Cray T3D; Cray T3D
computer; Data communication systems; Distributed
computer systems; FORTRAN; Fortran (programming
language); FORTRAN (programming language); Fortran/PVM
code porting; Interfaces (computer); low-level
facilities; Low-level facilities; low-level facilities;
message passing; parallel machines; parallel
programming; Parallel virtual machine; performance
gains; Performance gains; performance gains; Program
debugging; software portability; Software prototyping;
Subroutines; Supercomputers",
thesaurus = "Cray computers; FORTRAN; Message passing; Parallel
machines; Parallel programming; Software portability",
treatment = "P Practical",
}
@InProceedings{Nguyen:1995:SPI,
author = "D. Nguyen and B. Hillberg",
title = "Simulations of Pinhole Imaging for {AXAF}: Distributed
Processing Using the {MPI} Standard",
crossref = "Shaw:1995:ADA",
pages = "361--366 (or 361--363??)",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A9575P (Mathematical and computer techniques in
astronomy); C6110P (Parallel programming)C6185
(Simulation techniques); C6150N (Distributed systems
software); C7350 (Astronomy and astrophysics
computing)",
conftitle = "Astronomical Data Analysis Software and Systems IV
Meeting",
corpsource = "Smithsonian Astrophys. Obs., Cambridge, MA, USA",
keywords = "Application Programming Interface; astronomy
computing; AXAF mission; digital simulation;
distributed processing; LAM programming environment;
Local Area Multicomputer; memory intensive task;
message passing; Message Passing Interface; MPI
standard; Ohio Supercomputer Center; parallel mode
simulation; parallel processing; parallel programming;
pinhole imaging; pinhole simulation program; sequential
mode simulation; software packages; software
portability; workstation cluster; X-ray astronomy",
treatment = "T Theoretical or Mathematical; A Application",
}
@Article{Novotny:1995:BPP,
author = "Mark Novotny",
title = "{BOOKS}: {PVM} --- Parallel Virtual Machine: a Users'
Guide and Tutorial for Networked Parallel Computing",
journal = j-COMPUT-PHYS,
volume = "9",
number = "6",
pages = "607--??",
month = "????",
year = "1995",
CODEN = "CPHYE2",
ISSN = "0894-1866 (print), 1558-4208 (electronic)",
ISSN-L = "0894-1866",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computers in Physics",
}
@Article{Novotny:1995:BRA,
author = "Mark Novotny and Susan McKay and Wolfgang Christian",
title = "Book Review: {Al Geist, Adam Beguelin, Jack Dongarra,
Weicheng Jiang, Robert Manchek, and Vaidy Sunderam,
\booktitle{{PVM} --- Parallel Virtual Machine: a Users'
Guide and Tutorial for Networked Parallel Computing}}",
journal = j-COMPUT-PHYS,
volume = "9",
number = "6",
pages = "607--??",
month = nov,
year = "1995",
CODEN = "CPHYE2",
DOI = "https://doi.org/10.1063/1.4823450",
ISSN = "0894-1866 (print), 1558-4208 (electronic)",
ISSN-L = "0894-1866",
bibdate = "Wed Apr 10 08:45:57 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/computphys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://aip.scitation.org/doi/10.1063/1.4823450",
acknowledgement = ack-nhfb,
ajournal = "Comput. Phys",
fjournal = "Computers in Physics",
journal-URL = "https://aip.scitation.org/journal/cip",
}
@InProceedings{Nupairoj:1995:PES,
author = "N. Nupairoj and L. M. Ni",
title = "Performance evaluation of some {MPI} implementations
on workstation clusters",
crossref = "IEEE:1995:PSP",
pages = "98--105",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing); C6110B (Software engineering
techniques); C6150N (Distributed systems software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
keywords = "communication library; Communication library;
distributed memory systems; distributed-memory
computing systems; Distributed-memory computing
systems; high performance computing; High performance
computing; message passing; Message Passing Interface;
message-passing; Message-passing; MPI implementations;
MPI specification; performance evaluation; Performance
evaluation; software libraries; standards; workstation
clusters; Workstation clusters; workstations",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Distributed memory systems; Message passing;
Performance evaluation; Software libraries; Standards;
Workstations",
treatment = "P Practical",
}
@Article{Oakley:1995:ADR,
author = "D. R. Oakley and N. F. {Knight, Jr.} and D. D.
Warner",
title = "Adaptive dynamic relaxation algorithm for non-linear
hyperelastic structures. {III}. {Parallel}
implementation",
journal = j-COMPUT-METH-APPL-MECH-ENG,
volume = "126",
number = "1-2",
pages = "111--129",
month = sep,
year = "1995",
CODEN = "CMMECC",
ISSN = "0045-7825, 0374-2830",
ISSN-L = "0045-7825",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Appl. Res. Assoc. Inc., Raleigh, NC, USA",
classification = "C4185 (Finite element analysis); C4240P (Parallel
programming and algorithm theory); C5440
(Multiprocessing systems); C6150N (Distributed systems
software); C7440 (Civil and mechanical engineering
computing)",
fjournal = "Computer Methods in Applied Mechanics and
Engineering",
keywords = "128-Processor Intel hypercube; Adaptive dynamic
relaxation algorithm; Efficient parallel
implementation; Engineering workstation cluster;
Frictionless contact; Interprocessor communication;
Nonlinear hyperelastic structures; Nonlinear static
analysis; Parallel-processing resource; PVM; Relative
speedups; Scalability; Three-dimensional hyperelastic
systems; Two-dimensional hyperelastic systems",
pubcountry = "Netherlands",
thesaurus = "Adaptive systems; Elasticity; Engineering
workstations; Finite element analysis; Hypercube
networks; Local area networks; Parallel algorithms;
Relaxation; Structural engineering computing",
}
@InProceedings{Olszewski:1995:TCC,
author = "Luke Olszewski",
title = "A timing comparison of the conjugate gradient and
{Gauss--Seidel} parallel algorithms in a
one-dimensional flow equation using {PVM}",
crossref = "ACM:1995:PAS",
pages = "205--212",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The development of parallel processing came about due
to the ineffectiveness of a single processor to
accommodate the solutions of large scale problems in a
reasonable amount of time. In this paper, we shall
introduce one such problem, and discuss the
implementation of two parallel algorithms applied to
the linear approximations. This study will illustrate
how an approximation method which has a faster rate of
convergence may not necessarily produce the best
solution time.",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Georgia Southern
Univ.",
affiliationaddress = "Statesboro, GA, USA",
classification = "631.1; 722.4; 723.1; 723.5; 921.1; 921.6; C4130
(Interpolation and function approximation); C4140
(Linear algebra); C4240P (Parallel programming and
algorithm theory); C7310 (Mathematics computing); C7340
(Geophysics computing)",
conference = "Proceedings of the 33rd Annual Southeast Conference",
corpsource = "Dept. of Math. and Comput. Sci., Georgia Southern
Univ., Statesboro, GA, USA",
journalabr = "Proc Annu Southeast Conf",
keywords = "Approximation theory; approximations; Computational
methods; Conjugate gradient; conjugate gradient;
conjugate gradient methods; Conjugate gradient parallel
algorithms; Convergence; convergence; convergence of
numerical; Convergence of numerical methods; Flow of
fluids; Gauss Seidel parallel algorithms; Gauss--Seidel
parallel algorithm; geophysics computing; hydrology;
iterative methods; Large scale problems; large scale
problems; Large scale systems; linear; Linear
approximations; Mathematical models; mathematics
computing; methods; Numerical methods; One dimensional
flow equation; One-dimensional flow equation;
one-dimensional flow equation; Parallel algorithms;
parallel algorithms; Parallel processing; parallel
processing; Parallel processing systems; Parallel
virtual machine; PVM; Richards equation; Timing;
timing; Timing comparison",
meetingaddress = "Clemson, CA, USA",
meetingdate = "Mar 17--18 1995",
meetingdate2 = "03/17--18/95",
sponsororg = "ACM",
thesaurus = "Conjugate gradient methods; Convergence of numerical
methods; Geophysics computing; Hydrology; Iterative
methods; Mathematics computing; Parallel algorithms",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Ouenes:1995:PRA,
author = "A. Ouenes and W. W. Weiss and J. A. Sultan and J.
Anwar",
title = "Parallel Reservoir Automatic History Matching Using a
Network of Workstations and {PVM}",
crossref = "Anonymous:1995:RSS",
pages = "125--134",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Panda:1995:GRW,
author = "D. K. Panda",
title = "Global reduction in wormhole k-ary n-cube networks
with multidestination exchange worms",
crossref = "IEEE:1995:PIP",
pages = "652--659",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. and Inf. Sci., Ohio State Univ.,
Columbus, OH, USA",
classification = "C4230M (Multiprocessor interconnection); C5220P
(Parallel architecture); C5440 (Multiprocessing
systems)",
keywords = "Barrier synchronization operations; Communication
startup time; Complete global reduction; Data size;
Fast global reduction; Global reduction; Message
passing interface standard; Multidestination exchange
worms; Multidestination message passing mechanism;
Pairwise exchange worms; System size; Unicast-based
message passing; Wormhole k-ary n-cube networks",
thesaurus = "Hypercube networks; Message passing; Synchronisation",
}
@InProceedings{Panda:1995:IDE,
author = "D. K. Panda",
title = "Issues in designing efficient and practical algorithms
for collective communication on wormhole-routed
systems",
crossref = "Agrawal:1995:PIW",
pages = "8--15",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. and Inf. Sci., Ohio State Univ.,
Columbus, OH, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C5220P (Parallel architecture); C5470 (Performance
evaluation and testing); C6150N (Distributed systems
software)",
keywords = "Collective communication; Collective communication
operations; Communication types; Message Passing
Interface; MPI standard; Performance evaluation;
Practical algorithms; Scalable parallel systems;
Wormhole routed systems; Wormhole-routed systems",
thesaurus = "Message passing; Multiprocessor interconnection
networks; Parallel algorithms; Parallel machines",
}
@InProceedings{Pennington:1995:DHC,
author = "R. L. Pennington",
title = "Distributed and heterogeneous computing",
crossref = "Vandoni:1995:CSC",
pages = "25--57",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Pittsburgh Supercomputing Centre, PA, USA",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
keywords = "C; Distributed computing; Fortran; Heterogeneous
computing; Message passing; Programming; PVM",
thesaurus = "Message passing; Parallel machines; Parallel
programming; Virtual machines",
}
@InProceedings{Periyathamby:1995:NSG,
author = "U. Periyathamby and B. C. Khoo and K. S. Yeo and Q. X.
Wang",
title = "A Numerical Simulation of the Growth and Collapse of
Vapour Cavity Near a Free Surface on Distributed
Computing Through {PVM}",
crossref = "Bilger:1995:AFM",
pages = "815--818",
year = "1995",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Pfenning:1995:OCP,
author = "J{\"o}rg-Thomas Pfenning and Christoph Moll",
title = "Optimized communication patterns on workstation
clusters",
journal = j-PARALLEL-COMPUTING,
volume = "21",
number = "3",
pages = "373--388",
day = "10",
month = mar,
year = "1995",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:14:24 MDT 1999",
bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1995&volume=21&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1995&volume=21&issue=3&aid=964",
acknowledgement = ack-nhfb,
affiliation = "Koln Univ., Germany",
classification = "C4240P (Parallel programming and algorithm theory);
C5220P (Parallel architecture); C5440 (Multiprocessing
systems); C6110P (Parallel programming); C6150N
(Distributed systems software)",
corpsource = "Koln Univ., Germany",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "Data parallel programming model; Dynamic loop
scheduling algorithm; FDDI-ring; High startup
latencies; Limited communication bandwidth; Matrix
multiplication; Network usage; Optimized communication
patterns; Parallel communications architecture; PVM
message passing library; Sequential communication;
Sparse communication patterns; Workstation clusters",
pubcountry = "Netherlands",
thesaurus = "Message passing; Parallel programming; Scheduling;
Workstations",
}
@Article{Piscaglia:1995:DOC,
author = "P. Piscaglia and B. Macq and P. Maes",
title = "Distributed optimization of codebooks",
journal = j-SIGNAL-PROCESS-IMAGE-COMMUN,
volume = "7",
number = "3",
pages = "211--223",
month = sep,
year = "1995",
CODEN = "SPICEF",
ISSN = "0923-5965 (print), 1879-2677 (electronic)",
ISSN-L = "0923-5965",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Univ. Catholique de Louvain, Belgium",
classification = "B6120B (Codes); B6140C (Optical information, image
and video signal processing); B6210L (Computer
communications); C5260B (Computer vision and image
processing techniques); C5440 (Multiprocessing
systems); C5620L (Local area networks)",
fjournal = "Signal Processing: Image Communication",
keywords = "Codebooks; Codebooks optimization; Communication
bandwidth minimisation; Computer network; Distributed
optimization; Failure robustness; General-purpose
workstations; Image processing algorithms; LBG
algorithm; Load balancing; Parallel virtual machine;
Processors synchronisation; Specialized library",
pubcountry = "Netherlands",
thesaurus = "Image coding; Local area networks; Parallel machines;
Virtual machines; Workstations",
}
@InProceedings{Plank:1995:ADC,
author = "J. S. Plank and Youngbae Kim and J. J. Dongarra",
title = "Algorithm-based diskless checkpointing for fault
tolerant matrix operations",
crossref = "IEEE:1995:DPT",
pages = "351--360",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Tennessee Univ., TN, USA",
classification = "C4130 (Interpolation and function approximation);
C4140 (Linear algebra); C6110B (Software engineering
techniques); C6150N (Distributed systems software);
C7300 (Natural sciences computing)",
keywords = "Algorithm-based diskless checkpointing; Cholesky
factorization; Distributed scientific computations;
Fault tolerant matrix operations; Fault-tolerance;
High-performance implementations; IBM SP2; Long-running
scientific computations; Low overhead; LU
factorization; Performance; Preconditioned conjugate
gradient; Processors; PVM networks; QR factorization;
SUN workstations; Workstation network platform",
thesaurus = "Conjugate gradient methods; Local area networks;
Matrix algebra; Natural sciences computing; Software
fault tolerance; Subroutines; Workstations",
}
@InProceedings{Prasad:1995:PPB,
author = "S. K. Prasad and K. M. Yu",
title = "Performance of a {PVM-based} optimistic simulation
testbed on different parallel architectures",
crossref = "Hamza:1995:PII",
pages = "511--514",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing); C6185 (Simulation
techniques)C7430 (Computer engineering)",
corpsource = "Dept. of Math. and Comput. Sci., Georgia State Univ.,
Atlanta, GA, USA",
keywords = "architectures; bus-based shared-memory; discrete event
simulation; dynamic time; hypercube networks;
hypercube-based parallel computer; local-memory;
message granularity; multiprocessor; nCUBE-II;
optimistic discrete event simulation testbed; Parallel;
parallel; performance evaluation; PVM-based optimistic
simulation testbed; RS-6000; shared memory; Silicon
Graphics 4D/GTX; systems; Unix workstations; Virtual
Machine package; virtual machines; window",
sponsororg = "IASTED; ISMM",
treatment = "X Experimental",
}
@InProceedings{Puskas:1995:LBW,
author = "Z. Puskas",
title = "Load Balancing on Workstation Clusters Using {PVM}",
crossref = "Ferenczi:1995:PAH",
pages = "112--123",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Meas. and Instrum. Eng., Tech. Univ.
Budapest, Hungary",
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6150J (Operating systems); C6150N
(Distributed systems software)",
corpsource = "Dept. of Meas. and Instrum. Eng., Tech. Univ.
Budapest, Hungary",
keywords = "capacity sharing; computational; Computational
capacity sharing; distributed; distributed parallel
system; Distributed parallel system; Distributed
programming; load balancing; Load balancing; local area
networks; network operating systems; parallel; Parallel
Virtual Machine; processor-farm technique;
Processor-farm technique; programming; Programming;
programming models; Programming models; PVM; resource
allocation; workstation clusters; Workstation clusters;
workstations",
pubcountry = "Hungary",
thesaurus = "Local area networks; Network operating systems;
Parallel programming; Resource allocation;
Workstations",
treatment = "P Practical",
}
@InProceedings{Qaddouri:1995:MFS,
author = "A. Qaddouri and R. Roy and B. Goulard",
title = "Multigroup flux solvers using {PVM} [{Parallel Virtual
Machine}]",
crossref = "ANS:1995:MCR",
volume = "2",
pages = "1554--1562",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A2820H (Neutron diffusion); C7470 (Nuclear
engineering computing)",
corpsource = "Inst. de Genie Nucleaire, Ecole Polytech. de Montreal,
Que., Canada",
keywords = "collision probability; distributed memory; iterative;
multigroup flux solvers; neutron transport theory;
nuclear engineering computing; parallel processing;
Parallel Virtual Machine; PVM; time-independent
transport equation",
sponsororg = "ANS; Eur. Nucl. Soc.; Atomic Energy Soc. Japan",
treatment = "T Theoretical or Mathematical",
}
@MastersThesis{Qu:1995:FAS,
author = "Su Qu",
title = "Feature-driven area-based stereo matching method on
{PVM}",
type = "M.S. thesis",
school = inst-UGA,
address = inst-UGA:adr,
pages = "x + 110",
year = "1995",
bibdate = "Mon Jan 15 15:32:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Directed by Hamid R. Arabnia.",
acknowledgement = ack-nhfb,
}
@InProceedings{Rambu:1995:DSS,
author = "N. Rambu and S. Stefan and D. Borsan and S. Talpos",
title = "A diagnostic study of some meteorological fields
simulated with {UKMO} and {MPI} atmospheric general
circulation models",
crossref = "Gates:1995:PFI",
pages = "493--498",
year = "1995",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Ramon:1995:PKV,
author = "J. Ramon and P. Pena",
title = "Parallelization of {KENO-Va Monte Carlo} code",
journal = j-COMP-PHYS-COMM,
volume = "88",
number = "1",
pages = "76--82",
month = jul,
year = "1995",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/0010-4655(95)00025-B",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/001046559500025B",
acknowledgement = ack-nhfb,
affiliation = "Consejo de Seguridad Nucl., Madrid, Spain",
classification = "A2820H (Neutron diffusion); A2846E (Nuclear
criticality safety); A2846G (Packaging and
transportation of nuclear materials); C6110P (Parallel
programming); C7470 (Nuclear engineering computing)",
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
keywords = "CONVEX C3440; Criticality; Distributed memory systems;
FDDI network; Fuel storage pools; HP9000/735; KENO-Va
code; Message-passing interface; Monte Carlo code;
Parallelization; PVM; Random numbers; SCALE system;
Shared memory machines; Shipping casks; Transport
equation",
pubcountry = "Netherlands",
thesaurus = "Monte Carlo methods; Neutron transport theory; Nuclear
criticality safety; Nuclear engineering computing;
Nuclear materials packaging; Parallel programming",
}
@InProceedings{Ratha:1995:CUC,
author = "N. K. Ratha and A. K. Jain and M. J. Chung",
title = "Clustering using a coarse-grained parallel genetic
algorithm: a preliminary study",
crossref = "Cantoni:1995:CCA",
pages = "331--338",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
classification = "C1180 (Optimisation techniques); C1250 (Pattern
recognition); C4240P (Parallel programming and
algorithm theory); C6150N (Distributed systems
software)",
keywords = "Coarse grained parallel genetic algorithm;
Coarse-grained parallel genetic algorithm; Complex
optimization problems; Data sets; Distributed
algorithm; Distributed implementation; Divide and
conquer approach; GAs; Near linear speedup; Optimal
minimum squared error partition; Optimization problem;
Pattern clustering; Preliminary study; PVM; Standard
communication library; Standard K-means clustering
algorithm; Workstation cluster",
thesaurus = "Distributed algorithms; Divide and conquer methods;
Genetic algorithms; Pattern recognition; Problem
solving",
}
@InProceedings{Ratha:1995:DED,
author = "N. K. Ratha and T. Acar and M. Gokmen and A. K. Jain",
title = "A distributed edge detection and surface
reconstruction algorithm",
crossref = "Cantoni:1995:CCA",
pages = "149--154",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Michigan State Univ., East
Lansing, MI, USA",
classification = "B0260 (Optimisation techniques); B6140C (Optical
information, image and video signal processing); C1180
(Optimisation techniques); C1250 (Pattern recognition);
C4240P (Parallel programming and algorithm theory);
C5260B (Computer vision and image processing
techniques)",
keywords = "Associated energy functional; Cluster of workstations;
Distributed edge detection; Graduated non-convexity;
Image compression; Image restoration; Line process;
Optimal edge assignment; Pixel gray valves; PVM
communication library; Regularization techniques;
Scalable parallel algorithm; Surface reconstruction
algorithm; Weak membrane",
thesaurus = "Computer vision; Edge detection; Image restoration;
Parallel algorithms; Simulated annealing; Surface
reconstruction",
}
@InProceedings{Reinefeld:1995:PVE,
author = "A. Reinefeld and V. Schnecke",
title = "Portability versus efficiency? Parallel applications
on {PVM} and {Parix}",
crossref = "Fritzson:1995:PPA",
pages = "35--49",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C1160 (Combinatorial mathematics); C1180
(Optimisation techniques); C5620L (Local area
networks)C5440 (Multiprocessing systems); C6110B
(Software engineering techniques); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
corpsource = "Center for Parallel Comput., Paderborn Univ.,
Germany",
keywords = "algorithm architecture; benchmarks; combinatorial
mathematics; computing; efficiency; high-level
programming environment; LAN; local area networks;
massively parallel transputer system; moderately
parallel Parsytec GC/PowerPlus; optimisation; parallel
algorithms; parallel applications; parallel
high-performance; parallel programming; Parix
programming model; performance; portability; portable
programming models; processors; programming
environments; PVM programming model; software
performance evaluation; software portability; system;
T805; transputer systems; Unix; UNIX workstation
cluster; workstations",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@Article{Reynders:1995:OOO,
author = "John V. W. Reynders and David W. Forslund and Paul J.
Hinker and Marydell Tholburn and David G. Kilman and
William F. Humphrey",
title = "{OOPS}: an object-oriented particle simulation class
library for distributed architectures",
journal = j-COMP-PHYS-COMM,
volume = "87",
number = "1--2",
pages = "212--224",
day = "2",
month = may,
year = "1995",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/0010-4655(94)00172-X",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 21:29:54 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/001046559400172X",
acknowledgement = ack-nhfb,
affiliation = "Adv. Comput. Lab., Los Alamos Nat. Lab., NM, USA",
classification = "A0270 (Computational techniques); A0520G (Classical
ensemble theory); C5220P (Parallel architecture);
C6110J (Object-oriented programming); C6110P (Parallel
programming); C7320 (Physics and chemistry computing)",
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
keywords = "Connection Machine CM5; CRI T3D; Distributed
architectures; Materials science; Object-oriented
particle simulation class library; OOPS; Plasma
physics; Porous media; Portable code; PVM clusters;
Suspension flows; Uniform high-level interface; Vortex
simulations",
pubcountry = "Netherlands",
thesaurus = "C listings; Digital simulation; Multiprocessing
programs; Object-oriented programming; Parallel
architectures; Physics computing; Software libraries;
Software portability; Statistical mechanics",
}
@Article{Ross:1995:DCM,
author = "D. L. Ross and J. S. Collins and J. H. George",
title = "A dynamic capacity model using concurrent processing",
journal = j-NEURAL-PAR-SCI-COMPUT,
volume = "3",
number = "2",
pages = "249--262",
month = jun,
year = "1995",
CODEN = "NPACEM",
ISSN = "1061-5369",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math., Embry-Riddle Aeronaut. Univ., Daytona
Beach, FL, USA",
classification = "C1180 (Optimisation techniques); C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C7430
(Computer engineering); C7460 (Aerospace engineering
computing)",
fjournal = "Neural, Parallel and Scientific Computations",
keywords = "Airport capacity optimisation; Concurrent processing;
Concurrent programming; Dynamic capacity model;
Iterative method; National Airspace System; Optimal
capacity profiles; Parallel virtual machine; Public
domain software PVM; Time-varying index",
thesaurus = "Aerospace computing; Optimisation; Parallel
processing; Public domain software; Virtual machines",
}
@Article{Schafers:1995:TGP,
author = "L. Schafers and C. Scheidler and O. Kramer-Fuhrmann",
title = "{TRAPPER}: a graphical programming environment for
parallel systems",
journal = j-FUT-GEN-COMP-SYS,
volume = "11",
number = "4-5",
pages = "351--361",
month = aug,
year = "1995",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Res. and Technol, Daimler-Benz AG, Berlin,
Germany",
classification = "C6110P (Parallel programming); C6110V (Visual
programming); C6115 (Programming support); C6130B
(Graphics techniques); C6150G (Diagnostic, testing,
debugging and evaluating systems); C6150N (Distributed
systems software)",
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
keywords = "Communicating sequential processes; Communication
loads; Computation loads; Configtool; Designtool;
Graphical programming environment; Graphical
representation; High computing power; Hybrid program
development; Industrial applications; Interprocess
communication; Mapping; Monitoring system;
Optimization; Parallel process structure; Parallel
systems; Software event recording; System design;
Target hardware configuration; Textual representations;
TRAPPER; Visualization",
pubcountry = "Netherlands",
thesaurus = "Communicating sequential processes; Computer
animation; Data visualisation; Local area networks;
Parallel machines; Parallel programming; Programming
environments; Software tools; System monitoring;
Transputer systems; Visual programming",
}
@InProceedings{Schuster:1995:CSM,
author = "G. Schuster and F. Breitenecker",
title = "Coupling Simulators with the Model Interconnection
Concept and {PVM}",
crossref = "Breitenecker:1995:ESC",
pages = "321--326",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "ARGE Simulation News, Tech. Univ. of Vienna, Austria",
classification = "C6140D (High level languages); C6150N (Distributed
systems software); C6185 (Simulation techniques)",
corpsource = "ARGE Simulation News, Tech. Univ. of Vienna, Austria",
keywords = "ACSL; communication; continuous simulation; Continuous
simulation systems; digital simulation; message
passing; message passing system PVM; Message passing
system PVM; model interconnection concept; Model
interconnection concept; MOSIS; Mosis; program; Program
communication; PVM; simulation languages; systems",
pubcountry = "Netherlands",
thesaurus = "Digital simulation; Message passing; Simulation
languages",
treatment = "P Practical",
}
@Article{Sekharan:1995:LBM,
author = "Chandra N. Sekharan and Vineet Goel and R. Sridhar",
title = "Load balancing methods for ray tracing and binary tree
computing using {PVM}",
journal = j-PARALLEL-COMPUTING,
volume = "21",
number = "12",
pages = "1963--1978",
day = "12",
month = dec,
year = "1995",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 06 18:01:04 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1995&volume=21&issue=12;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1995&volume=21&issue=12&aid=1028",
abstract = "We propose efficient load balancing methods for two
computational problems namely ray tracing and bottom-up
binary tree computing in a distributed environment. In
the context of ray tracing, we propose a variant of a
static load balancing technique presented in [15] where
the sampling is based on partitioning the object space.
Our approach partitions the image instead and uses an
efficient scheduling technique for load balancing.
Computations carried out on a binary tree arise
naturally in image processing and network optimization
problems. Many of these problems are solved efficiently
in parallel by the popular tree contraction technique
[1]. In this paper, we explore the tree-contraction
technique in a distributed setting using the grain
packing method [9]. Implementations of our algorithms
on a cluster of workstations using Parallel Virtual
Machine (PVM) [6] demonstrate near-perfect load
balancing.",
acknowledgement = ack-nhfb,
affiliation = "Loyola Univ of Chicago",
affiliationaddress = "Chicago, IL, USA",
classification = "721.1; 722.3; 722.4; 723.1; 723.2; 921.4; C1160
(Combinatorial mathematics); C4240P (Parallel
programming and algorithm theory); C6130B (Graphics
techniques)",
corpsource = "Dept. of Math. and Comput. Sci., Loyola Univ.,
Chicago, IL, USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "Algorithms; Binary tree computing; binary tree
computing; Computational complexity; Computer graphics;
Computer networks; Computer workstations; distributed
algorithms; Distributed computer systems; Distributed
environment; Grain packing methods; Image processing;
Load balancing; load balancing; Machine; network
optimization; Optimization; Parallel processing
systems; Parallel Virtual; Parallel virtual machine;
partitioning; PVM; Ray tracing; ray tracing; Resource
allocation; resource allocation; Scheduling; scheduling
technique; Tree contraction technique; Trees
(mathematics); trees (mathematics); Workstation
cluster",
pubcountry = "Netherlands",
treatment = "T Theoretical or Mathematical",
}
@Article{Shen:1995:PSM,
author = "H. Shen",
title = "Parallel $k$-set mutual range-join in hypercubes",
journal = j-MICROPROC-MICROPROG,
volume = "41",
number = "7",
pages = "443--448",
month = nov,
year = "1995",
CODEN = "MMICDT",
ISSN = "0165-6074 (print), 1878-7061 (electronic)",
ISSN-L = "0165-6074",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. and Inf. Technol., Griffith Univ.,
Nathan, Qld., Australia",
classification = "C4230M (Multiprocessor interconnection); C4240P
(Parallel programming and algorithm theory); C5220P
(Parallel architecture); C5470 (Performance evaluation
and testing); C5670 (Network performance)",
fjournal = "Microprocessing and Microprogramming",
keywords = "Data comparisons; Hypercubes; Mutual range-join;
Parallel algorithm; Parallel k-set mutual range-join;
Performance; Permutation-based range-join; PVM; Tuples;
Worst case",
pubcountry = "Netherlands",
thesaurus = "Hypercube networks; Parallel algorithms; Performance
evaluation",
}
@InProceedings{Siegelin:1995:BPW,
author = "C. Siegelin and U. Finger and C. O'Donnell",
title = "Boosting the performance of workstations through
{WARPmemory}",
crossref = "Haridi:1995:EPP",
pages = "703--706",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. Inf., Ecole Nat. Superieure des Telecommun.,
Paris, France",
classification = "C5310 (Storage system design); C5540 (Terminals and
graphic displays); C5620L (Local area networks); C6110P
(Parallel programming); C6120 (File organisation);
C6150N (Distributed systems software)",
keywords = "Improved workstation performance; Local network;
Parallel program execution; Performance optimization;
Physically shared memory; PVM; Running system; Serially
multiported memory; Standard programming interface;
WARPmemory; Workstation network",
thesaurus = "Application program interfaces; Local area networks;
Memory architecture; Message passing; Parallel
programming; Shared memory systems; Workstations",
}
@InProceedings{Silva:1995:PCR,
author = "L. M. Silva and J. G. Silva and S. Chapple and L.
Clarke",
title = "Portable checkpointing and recovery",
crossref = "IEEE:1995:PFI",
pages = "188--195",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. de Engenharia Inf., Coimbra Univ., Portugal",
classification = "C6110B (Software engineering techniques); C6150J
(Operating systems)",
keywords = "Data-reconfiguration; F CHIMP/MPI; Flexible recovery
mechanism; Parallel library; Portability; Portable
checkpointing; Recovery",
thesaurus = "Operating systems [computers]; Parallel machines;
Software portability; System recovery",
}
@InProceedings{Simmunovic:1995:MIP,
author = "S. Simmunovic and T. Zacharia and N. Baltas and D. B.
Spalding",
title = "{MPI} Implementation of {Phoenics}: a General Purpose
Computational Fluid Dynamics Code",
crossref = "Tentner:1995:HPC",
pages = "122--127",
year = "1995",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Simunovic:1995:MIP,
author = "S. Simunovic and T. Zacharia and N. Baltas and D. B.
Spalding",
title = "{MPI} implementation of {PHOENICS}: a general purpose
computational fluid dynamics code",
crossref = "Tentner:1995:HPC",
pages = "122--127",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A4725Q (Convection and heat transfer); A4770F
(Chemically reactive flows); A8240 (Chemical kinetics
and reactions: special regimes); C5220P (Parallel
architecture); C5440 (Multiprocessing systems); C6110B
(Software engineering techniques); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems software); C6185 (Simulation
techniques); C7320 (Physics and chemistry computing)",
conftitle = "Proceedings High Performance Computing `95",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "chemical reactions; chemically reactive flow;
chemistry computing; computational analysis programs;
digital simulation; dynamical reaction process
simulation; EARTH parallel version; flow simulation;
fluid dynamics; fluid flow simulation; general purpose
computational fluid dynamics code; heat transfer; heat
transfer simulation; heterogeneous computer networks;
high performance computing; Intel Paragon XP/S 35;
Intel Paragon XP/S 5; Kendall Square Research; large
scale computational simulations; massively parallel
supercomputers; message passing; Message Passing
Interface standard; MPI libraries; multiprocessing
systems; multiprocessor SGI Onyx computer; parallel
architectures; parallel machines; PHOENICS; physics
computing; portable computational tool; program
testing; scalable performance; software packages;
software performance evaluation; software portability",
sponsororg = "SCS",
treatment = "P Practical",
}
@Article{Sitsky:1995:IPM,
author = "D. Sitsky and D. Walsh and C. Johnson",
title = "Implementation and performance of the {MPI} message
passing interface on the {Fujitsu AP1000}
multicomputer",
journal = j-AUSTRALIAN-COMP-SCI-COMM,
volume = "17",
number = "1",
pages = "475--481",
month = "????",
year = "1995",
CODEN = "ACSCDD",
ISSN = "0157-3055",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Australian Nat. Univ.,
Canberra, ACT, Australia",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software); C6155 (Computer communications
software)",
conflocation = "Glenelg, SA, Australia; 1-3 Feb. 1995",
conftitle = "Eighteenth Australasian Computer Science Conference.
ACSC'95",
corpsource = "Dept. of Comput. Sci., Australian Nat. Univ.,
Canberra, ACT, Australia",
fjournal = "Australian Computer Science Communications",
keywords = "application program interfaces; benchmarks;
Benchmarks; broadcasting; clustered systems; Clustered
systems; collective routines; Collective routines;
computer communications software; Fujitsu AP1000
multicomputer; group-wide broadcast; Group-wide
broadcast; hardware operations; Hardware operations;
implementation; Implementation; message passing;
Message Passing Interface; MPI; multiprocessing
systems; native calls; Native calls; operating system;
Operating system; parallel libraries; Parallel
libraries; parallel programming; performance;
Performance; portability; Portability; selective
broadcast operation; Selective broadcast operation;
software libraries; software performance evaluation",
pubcountry = "Australia",
thesaurus = "Application program interfaces; Broadcasting; Computer
communications software; Message passing;
Multiprocessing systems; Parallel programming; Software
libraries; Software performance evaluation",
treatment = "P Practical",
}
@InProceedings{Sivaraman:1995:PSP,
author = "H. Sivaraman and C. S. Raghavendra",
title = "Parallelizing sequential programs to a cluster of
workstations",
crossref = "Agrawal:1995:PIW",
pages = "38--41",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Electr. Eng. and Comput. Sci., Washington
State Univ., Pullman, WA, USA",
classification = "C5440 (Multiprocessing systems); C6115 (Programming
support); C6150C (Compilers, interpreters and other
processors)",
keywords = "ADAPTOR; ADDT; Automatic parallelization system;
AZTEC; Benchmark programs; Cluster of workstations;
Data distribution tool; GUI; HPF compiler; Parafrase-2
parallelizing compiler; PVM; Sequential programs
parallelisation; Source file",
thesaurus = "Parallel processing; Parallelising compilers; Software
tools; Workstations",
}
@Article{Skjellum:1995:EAM,
author = "Anthony Skjellum and Ewing Lusk and William Gropp",
title = "Early applications in the {Message-Passing Interface}
({MPI})",
journal = j-IJSAHPC,
volume = "9",
number = "2",
pages = "79--94",
month = "Summer",
year = "1995",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We describe a number of early efforts to make use of
the Message-Passing Interface (MPI) standard in
applications, based on an informal survey conducted in
May-June, 1994. Rather than a definitive statement of
all MPI developmental work, this paper addresses the
initial successes, progress, and impressions that
application developers have had with MPI, according to
the responses received. We summarize the important
aspects of each survey response, and draw conclusions
about the spread of MPI into applications. An
understanding of message passing and access to the MPI
standard are prerequisites for appreciating this paper.
Some background material is provided to ease this
requirement.",
acknowledgement = ack-nhfb,
affiliation = "Mississippi State Univ",
affiliationaddress = "Mississippi State, MS, USA",
classification = "722.2; 722.3; 722.4; 902.2; C6150N (Distributed
systems software)",
corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
journalabr = "Int J Supercomput Appl High Perform Comput",
keywords = "Application developers; application developers;
Computer hardware; Data communication systems; message
passing; Message passing interface (MPI);
Message-Passing Interface; MPI standard; Network
protocols; software engineering; software standards;
Standards; Survey; survey; User interfaces",
thesaurus = "Message passing; Software engineering; Software
standards",
treatment = "P Practical",
}
@InProceedings{Skjellum:1995:EMP,
author = "A. Skjellum and N. E. Doss and K. Viswanathan and A.
Chowdappa and P. V. Bangalore",
title = "Extending the message passing interface ({MPI})",
crossref = "IEEE:1995:PSP",
pages = "106--118",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
keywords = "computer networks; Computer networks; Europe; high
performance computing; High performance computing;
intercommunicator extensions; Intercommunicator
extensions; message passing; message passing interface;
Message passing interface; message passing standard;
Message passing standard; MPI Forum; multicomputers;
Multicomputers; multinational vendors; Multinational
vendors; national laboratories; National laboratories;
processor scheduling; research centers; Research
centers; scheduling; Scheduling; standards; United
States; universities; Universities; workstations;
Workstations",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Computer networks; Message passing; Processor
scheduling; Standards",
treatment = "P Practical",
}
@Article{Smith:1995:CRC,
author = "K. A. Smith and A. J. Baratta and G. E. Robinson",
title = "Coupled {RELAP5} and {CONTAIN} Accident Analysis Using
{PVM}",
journal = j-NUCLEAR-SAFETY,
volume = "36",
number = "1",
pages = "94--108",
month = jan # "--" # jun,
year = "1995",
CODEN = "NUSAAZ",
ISSN = "0029-5604",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This article describes the development of an
integrated accident analysis capability considering
both reactor vessel and containment system responses.
This integrated package, which uses the RELAP5 and
CONTAIN computer codes, provides the user with greater
accuracy and modeling flexibility when compared with
accident analyses using these codes separately.
Multiprocessing, together with message-passing-based
data transfer, enables these concurrent RELAP5 and
CONTAIN calculations. The data transfer facilitates the
coupling between the reactor vessel and containment
portions of the calculation. The Parallel Virtual
Machine software system running on a network of IBM
RISC System\slash 6000 workstations provided the
multiprocessing capabilities required for this work.
The results of an anticipated-transient-without-scram
scenario for a boiling-water reactor nuclear power
plant are provided. For the scenario analyzed, the
containment temperatures and pressures that were
predicted on the basis of the stand-alone codes and
standard analysis methods were lower (i.e., less
conservative) than those predicted with the use of the
integrated code package.",
acknowledgement = ack-nhfb,
affiliation = "Oak Ridge Natl Lab",
affiliationaddress = "Oak Ridge, TN, USA",
classification = "621; 641.1; 723.2; 723.5; 914.1; 921.6; A2841C
(Computer codes for fission reactor theory and design);
A2844 (Fission reactor protection systems, safety and
accidents); A2850G (Light water reactors); C6150N
(Distributed systems software); C7470 (Nuclear
engineering computing)",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
fjournal = "Nuclear safety",
journalabr = "Nucl Saf",
keywords = "anticipated-; Boiling water reactors; boiling-water
reactor; BWR; Calculations; capability; Codes
(symbols); Computer aided analysis; computer codes;
concurrent RELAP5/CONTAIN calculations; CONTAIN
computer codes; containment; containment system
responses; containment temperatures; Containment
vessels; coupled RELAP5/CONTAIN accident analysis; Data
transfer; engineering computing; engineering
workstations; fission; fission reactor accidents;
fission reactor design; IBM computers; IBM RISC
System/6000; instruction set computing; integrated
accident analysis; integrated package; Machine software
system; message passing; message-passing-based data
transfer; modeling flexibility; multiprocessing;
multiprocessing capabilities; multiprocessing programs;
nuclear; nuclear power plant; nuclear power stations;
Nuclear reactor accidents; Parallel processing systems;
Parallel Virtual; Parallel virtual machine software
system; Pressure; pressures; reactor containment;
reactor vessel; Reactor vessel and containment system;
reduced; RELAP5 computer codes; software packages;
stand-alone codes; Temperature; transient-without-scram
scenario; workstations",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Stagg:1995:IPN,
author = "A. K. Stagg and D. D. Cline and G. F. Carey",
title = "Implementing a parabolized {Navier--Stokes} flow
solver on the {Cray T3D}",
crossref = "Bailey:1995:PSS",
pages = "143--148",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Jet Propulsion Lab., Cray Res. Inc., Pasadena, CA,
USA",
classification = "C4170 (Differential equations); C4240P (Parallel
programming and algorithm theory); C5440
(Multiprocessing systems); C6150N (Distributed systems
software); C7310 (Mathematics computing)",
keywords = "Cray T3D; Globally addressable memory; Hyperbolic
parabolic system; Interprocessor communication
routines; Large-scale simulation; Library calls;
Massively parallel architectures; Massively parallel
computers; Message passing; Parabolized Navier--Stokes
flow solver; Parallel Virtual Machine; Performance;
Performance results",
thesaurus = "Cray computers; Hyperbolic equations; Mathematics
computing; Message passing; Navier--Stokes equations;
Parabolic equations; Parallel algorithms; Parallel
machines; Software performance evaluation",
}
@InProceedings{Stals:1995:AMP,
author = "L. Stals",
title = "Adaptive multigrid in parallel",
crossref = "Bailey:1995:PSS",
pages = "367--372",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Australian Nat. Univ., Canberra, ACT, Australia",
classification = "C4170 (Differential equations); C4185 (Finite
element analysis); C6110P (Parallel programming);
C6150N (Distributed systems software); C7310
(Mathematics computing)",
keywords = "Adaptive multigrid; Adaptive refinement methods; C++;
Elliptic partial differential equations; Finite element
method; Kernighan-Lin method; Load balancing; MIMD
architectures; Multigrid methods; Multigrid programs;
Newest node bisection; Parallel multigrid; Polygonal
region; PVM; Square domains; Structured grids; Uniform
grids; Unstructured grids",
thesaurus = "Elliptic equations; Finite element analysis;
Mathematics computing; Parallel machines; Parallel
programming; Partial differential equations; Resource
allocation",
}
@InProceedings{Stankovski:1995:MPA,
author = "Z. Stankovski",
title = "A Massively Parallel Algorithm for the Collision
Probability Calculations in the {APOLLO-II} Code Using
the {PVM} Library",
crossref = "ANS:1995:MCR",
volume = "2",
pages = "1573--1583",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A2820H (Neutron diffusion); A2841C (Computer codes
for fission reactor theory and design); C6110P
(Parallel programming); C7470 (Nuclear engineering
computing)",
corpsource = "Dept. de Mecanique et Technol., Commissariat a
l`Energie Atomique, Gif sur Yvette, France",
keywords = "APOLLO-; collision probability; host/node
programmation model; II code; massively parallel
algorithm; message passing; neutron transport; neutron
transport theory; nuclear engineering computing;
parallel algorithms; parallel programming;
parallelization; PVM library",
sponsororg = "ANS; Eur. Nucl. Soc.; Atomic Energy Soc. Japan",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Stathopoulos:1995:DLB,
author = "A. Stathopoulos and A. Ynnerman",
title = "Dynamic load balancing of atomic structure programs on
a {PVM} cluster",
crossref = "Hertzberger:1995:HPM",
pages = "384--391",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Vanderbilt Univ., Nashville,
TN, USA",
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C6150J (Operating systems)",
corpsource = "Dept. of Comput. Sci., Vanderbilt Univ., Nashville,
TN, USA",
keywords = "allocation; atomic data; Atomic data; atomic structure
programs; Atomic structure programs; cluster; dedicated
cluster of; Dedicated cluster of workstations; dynamic
load balancing; Dynamic load balancing; machine; MCHF
package; parallel processing; parallel virtual;
Parallel virtual machine; perfect load balancing;
Perfect load balancing; performance evaluation; PVM;
PVM cluster; resource; workstations",
pubcountry = "Germany",
thesaurus = "Parallel processing; Performance evaluation; Resource
allocation",
treatment = "A Application; P Practical",
}
@Article{Stellner:1995:CMP,
author = "G. Stellner and M. Schumann and M. Girnghuber",
title = "Comparing message-passing libraries with the {SPY}
analysis environment",
journal = j-IT-IT,
volume = "37",
number = "2",
pages = "46--52",
month = apr,
year = "1995",
CODEN = "ITINEV",
ISSN = "0944-2774",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support)",
fjournal = "Informationstechnik und technische Informatik: IT +
TI",
keywords = "Computational hard problems; Message-passing
libraries; Multi-user environments; Networks of
workstations; NXLib; P4; PVM; SPY analysis environment;
Virtual parallel computer",
language = "German",
pubcountry = "Germany",
thesaurus = "Message passing; Parallel processing; Parallel
programming; Programming environments",
}
@InProceedings{Stubbs:1995:ICE,
author = "S. S. Stubbs and D. L. Carver",
title = "{IPCC++}: a {C++} extension for interprocess
communication with objects",
crossref = "IEEE:1995:PNA",
pages = "205--210",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci., Louisiana State Univ., Baton
Rouge, LA, USA",
classification = "C6110J (Object-oriented programming); C6110P
(Parallel programming); C6140D (High level languages);
C6150N (Distributed systems software); C7430 (Computer
engineering)",
keywords = "Asynchronous communication; C++ extension; Concurrency
primitives; Distributed memory models; Dynamic process
creation; Explicit concurrency; Inheritance;
Inter-object concurrency; Interprocess communication
objects; IPCC++; Language model; Object-oriented
programming languages; Orthogonality; Parallel Virtual
Machine; PVM; Selective waiting; Socket-based
application program interface; Static process creation;
Synchronous communication; Typed message passing
system; UNIX interprocess communication system calls
abstraction",
thesaurus = "C language; Distributed memory systems; Inheritance;
Message passing; Object-oriented languages; Parallel
programming; Unix; Virtual machines",
}
@InProceedings{Sunderam:1995:RIH,
author = "V. S. Sunderam",
title = "Recent initiatives in heterogeneous parallel
computing",
crossref = "Gray:1995:PCT",
pages = "1--16",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C6110P (Parallel programming);
C6150N (Distributed systems software)",
keywords = "Concurrent computing; Concurrent distributed
computing; Distributed computing; Heterogeneous
parallel computing; Lightweight process; Parallel
virtual machine; Performance enhancement; Performance
evaluation; PVM; Research initiative; Thread",
thesaurus = "Open systems; Parallel processing; Performance
evaluation; Virtual machines",
}
@InProceedings{Suresh:1995:IOP,
author = "H. Suresh",
title = "Implementation of an optimal parallel algorithm for
arithmetic expression parsing",
crossref = "Narashimhan:1995:IIF",
pages = "925 vol.2",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Fac. of Sci. and Technol., Griffith Univ., Brisbane,
Qld., Australia",
classification = "C4240P (Parallel programming and algorithm theory);
C5230 (Digital arithmetic methods); C6110P (Parallel
programming); C6150C (Compilers, interpreters and other
processors)",
keywords = "Arithmetic expression parsing; Concurrent processing
environment; Optimal parallel algorithm; Parallel
computer architectures; PVM; SIMD parallel
architecture; Simple recursive descent parser",
thesaurus = "Digital arithmetic; Parallel algorithms; Parallelising
compilers",
}
@InProceedings{Suresh:1995:PIQ,
author = "H. Suresh",
title = "{PVM} implementation of quadtree building algorithms
on {SIMD} hypercube system",
crossref = "Narashimhan:1995:IIF",
volume = "2",
pages = "855--858 (vol. 2)",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Microelectron. Eng., Griffith Univ., Brisbane,
Qld., Australia",
classification = "C1160 (Combinatorial mathematics); C4240P (Parallel
programming and algorithm theory); C5260B (Computer
vision and image processing techniques); C6110P
(Parallel programming); C6120 (File organisation)",
corpsource = "Sch. of Microelectron. Eng., Griffith Univ., Brisbane,
Qld., Australia",
keywords = "algorithms; bilevel; Bilevel images; DOS operating
system; Hierarchical data structures; hierarchical data
structures; hypercube networks; hypercube system; IBM
compatible PCs; image processing; images; Linear
quadtrees; linear quadtrees; parallel algorithms;
Parallel Virtual Machine System; Parallel Virtual
Machine System Software; PVM implementation; Quadtree
building algorithms; quadtree building algorithms;
quadtrees; SIMD; SIMD hypercube system; Single
Instruction Multiple Data hypercube; Single Instruction
Multiple Data hypercube algorithms; Software",
sponsororg = "Parallel Algorithms, Archit. and Software Eng. Res.
Lab.; IEEE; IEEE Comput. Soc.; ACM; Euromicro; IBM;
Instn. Eng. Australia; Inst. Radio and Electron. Eng.
Soc.; Australian Comput. Soc",
thesaurus = "Hypercube networks; Image processing; Parallel
algorithms; Quadtrees",
treatment = "P Practical",
}
@Article{Swanson:1995:PAP,
author = "Eric Swanson and Terry P. Lybrand",
title = "{PVM-AMBER}: a parallel implementation of the {AMBER}
molecular mechanics package for workstation clusters",
journal = j-J-COMPUT-CHEM,
volume = "16",
number = "9",
pages = "1131--1140",
month = sep,
year = "1995",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.540160907",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Thu Nov 29 14:54:31 MST 2012",
bibsource = "http://www.interscience.wiley.com/jpages/0192-8651;
http://www.math.utah.edu/pub/tex/bib/jcomputchem1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Center for Bioeng., Washington Univ., Seattle, WA,
USA",
classification = "A3620C (Macromolecular conformation (statistics and
dynamics)); A8710 (General, theoretical, and
mathematical biophysics); A8715D (Physical chemistry of
biomolecular solutions; A8715H (Biomolecular dynamics,
molecular probes, molecular pattern recognition);
C6150N (Distributed systems software); C6185
(Simulation techniques); C7320 (Physics and chemistry
computing); C7330 (Biology and medical computing);
condensed states)",
corpsource = "Center for Bioeng., Washington Univ., Seattle, WA,
USA",
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X",
keywords = "AMBER molecular mechanics package parallel version;
biology computing; biomolecular simulation;
Biomolecular simulation problems; Computational
speedup; computational speedup; computations; Data
exchange; data exchange; digital simulation;
efficiency; Ethernet; FDDI; FDDI connections; free
energy; Free-energy perturbation computations;
free-energy perturbation computations; intermolecular
mechanics; Lipid bilayer systems; lipid bilayer
systems; lipid bilayers; local area; molecular
biophysics; molecular dynamics; Molecular dynamics
computations; molecular dynamics method; networks;
Nonbonded energies; nonbonded energies; Nonbonded
forces; nonbonded forces; Nonbonded pair list
generation; nonbonded pair list generation; packages;
parallel; Parallel efficiency; parallel processing;
peptide; perturbation theory; problems; Processor
synchronization; processor synchronization; Protein;
protein; proteins; PVM message-passing software;
PVM-AMBER; Silicon Graphics; software; solvated;
Solvated peptide; Test simulations; test simulations;
Unix; Unix workstations; Workstation clusters;
workstation clusters; workstations",
onlinedate = "7 Sep 2004",
thesaurus = "Biology computing; Digital simulation; FDDI; Free
energy; Intermolecular mechanics; Lipid bilayers; Local
area networks; Molecular biophysics; Molecular dynamics
method; Parallel processing; Perturbation theory;
Proteins; Software packages; Unix; Workstations",
treatment = "P Practical",
}
@InProceedings{Ten:1995:TPE,
author = "S. V. Ten and V. V. Savchenko and A. A. Pasko",
title = "Time performance evaluation of implicit surface
polygonization on distributed systems",
crossref = "Gray:1995:PCT",
pages = "183--193",
year = "1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Distributed Parallel Process. Lab., Aizu Univ.,
Aizu-Wakamatsu City, Japan",
classification = "C4240P (Parallel programming and algorithm theory);
C4260 (Computational geometry); C6130B (Graphics
techniques); C6150N (Distributed systems software);
C7310 (Mathematics computing); C7400 (Engineering
computing)",
keywords = "CAD system; Complex surfaces; Distributed systems;
Functions; Implicit functions; Implicit surface
polygonization; Mathematics; Parallelization; Polygonal
approximation; PVM system; Rendering; Scalable
algorithm; Software algorithm; Solids; Time performance
evaluation; Toroidal architecture; Transputer network;
Visual analysis",
thesaurus = "CAD; Computational geometry; Data visualisation;
Engineering graphics; Functions; Mathematics computing;
Message passing; Parallel algorithms; Parallel
architectures; Rendering [computer graphics]; Software
performance evaluation; Transputer systems",
}
@InProceedings{Tsunekawa:1995:EIE,
author = "H. Tsunekawa",
title = "Effective implementation of {EDEM} workstation cluster
using {PVM}",
crossref = "Pahl:1995:CCB",
pages = "503--508",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Uhl:1995:AWA,
author = "A. Uhl",
title = "Adapted wavelet analysis on moderate parallel
distributed memory {MIMD} architectures",
crossref = "Ferreira:1995:PAI",
pages = "275--283",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Res. Inst. for Softwaretechnol., Salzburg Univ.,
Austria",
classification = "B0230 (Integral transforms); B0290Z (Other numerical
methods); B6120B (Codes); B6140C (Optical information,
image and video signal processing); C1130 (Integral
transforms); C4190 (Other numerical methods); C4240P
(Parallel programming and algorithm theory); C5220P
(Parallel architecture); C5260B (Computer vision and
image processing techniques); C6150N (Distributed
systems software)",
keywords = "Adapted wavelet analysis; Algorithm efficiency;
Decomposition; Image compression; Moderate parallel
distributed memory MIMD architectures; PVM; Subband
based parallelization; Wavelet packet best basis
selection; Workstation cluster",
thesaurus = "Distributed memory systems; Image coding; Parallel
algorithms; Parallel architectures; Wavelet
transforms",
}
@InProceedings{Uhl:1995:PCC,
author = "A. Uhl",
title = "Parallel Compact Coding of Satellite Images with
Wavelet Packets using {PVM}",
crossref = "Prasanna:1995:FIP",
pages = "382--387",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6120B (Codes); B6140C (Optical information, image
and video signal processing); C4190 (Other numerical
methods); C5260B (Computer vision and image processing
techniques); C5440 (Multiprocessing systems); C6110P
(Parallel programming); C6130 (Data handling
techniques); C7460 (Aerospace engineering computing)",
corpsource = "Res. Inst. for Softwaretechnol., Salzburg Univ.,
Austria",
keywords = "aerospace computing; artificial satellites;
compression; data; image; image coding; image
compression methods; parallel; parallel approach;
parallel compact coding; parallel machines; parallel
programming environment; processing; programming; PVM;
quality; satellite data; satellite images; wavelet
packet decomposition; wavelet packet decompositions;
wavelet packets; wavelet transforms",
pubcountry = "India",
treatment = "P Practical",
}
@InProceedings{Uhl:1995:VPW,
author = "A. Uhl",
title = "Vector and parallel wavelet transforms for the
analysis of time-varying signals",
crossref = "Bailey:1995:PSS",
pages = "9--14",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "RIST, Salzburg Univ., Austria",
classification = "C1130 (Integral transforms); C4190 (Other numerical
methods); C4240P (Parallel programming and algorithm
theory); C5260 (Digital signal processing); C5440
(Multiprocessing systems); C5620L (Local area
networks); C6110P (Parallel programming); C6150N
(Distributed systems software)",
keywords = "Asynchronous task pool; Continuous wavelet transform;
Convex C3440 Vectorcomputer; Load balancing;
Master-slave programming scheme; Parallel wavelet
transforms; PVM; Speed-up; Time-varying signal
analysis; Timing; Vector wavelet transforms;
Workstation cluster",
thesaurus = "Local area networks; Parallel algorithms; Parallel
programming; Resource allocation; Signal processing;
Time-varying systems; Timing; Vector processor systems;
Wavelet transforms; Workstations",
}
@Article{Vaughan:1995:MPM,
author = "Paula L. Vaughan and Anthony Skjellum and Donna S.
Reese and Fei-Chen Cheng",
title = "Migrating from {PVM} to {MPI}, part {I}: The {Unify}
system",
journal = j-FRONTIERS-MASS-PAR-COMP-CONF-PROC,
pages = "488--495",
month = "????",
year = "1995",
bibdate = "Fri May 24 09:57:40 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95TH8024.",
abstract = "A new kind of portability system for modifying the PVM
message passing system to generate the Message Passing
Interface (MPI) standard notation for message passing
is described. The system, known as Unify, is designed
to reduce the effort of learning MPI while providing a
sensible means to make use of MPI libraries and MPI
calls. It also allows the immediate use of MPI-based
parallel libraries in applications.",
acknowledgement = ack-nhfb,
affiliation = "Mississippi State Univ",
affiliationaddress = "Mississippi State, MS, USA",
classification = "721.1; 722.3; 722.4; 723.2; 902.2; 921.6",
conference = "Proceedings of the 5th Symposium on the Frontiers of
Massively Parallel Computation",
fjournal = "Frontiers of Massively Parallel Computation ---
Conference Proceedings",
journalabr = "Front Massively Parallel Comput Conf Proc",
keywords = "Computational linguistics; Computer software
portability; Computer workstations; Data communication
systems; Data handling; Data structures; Database
systems; Interfaces (computer); Mathematical models;
Message passing; Message Passing Interface; Parallel
processing systems; Standards",
meetingaddress = "McLean, VA, USA",
meetingdate = "Feb 6--9 1995",
meetingdate2 = "02/06--09/95",
sponsor = "IEEE Computer Society",
}
@Article{Vincent:1995:HPP,
author = "James J. Vincent and Kenneth M. {Merz Jr.}",
title = "A highly portable parallel implementation of {AMBER4}
using the message passing interface standard",
journal = j-J-COMPUT-CHEM,
volume = "16",
number = "11",
pages = "1420--1427",
month = nov,
year = "1995",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.540161110",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Thu Nov 29 14:54:32 MST 2012",
bibsource = "http://www.interscience.wiley.com/jpages/0192-8651;
http://www.math.utah.edu/pub/tex/bib/jcomputchem1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Chem., Pennsylvania State Univ., University
Park, PA, USA",
classification = "A3620 (Macromolecules and polymer molecules); A6120J
(Computer simulation of static and dynamic liquid
behaviour); A8715 (Molecular biophysics); C5220P
(Parallel architecture); C7320 (Physics and chemistry
computing)",
corpsource = "Dept. of Chem., Pennsylvania State Univ., University
Park, PA, USA",
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X",
keywords = "AMBER4; Cray C90; Cray T3D; free energy; free-energy
perturbation module Gibbs; Free-energy perturbation
module Gibbs; IBM SP1/SP2; lipid bilayer molecular
dynamics simulation; Lipid bilayer molecular dynamics
simulation; macromolecular modeling package;
Macromolecular modeling package; macromolecules;
message passing; message passing interface standard;
Message passing interface standard; MINMD; molecular
biophysics; molecular dynamics method; molecular
dynamics/minimization module; Molecular
dynamics/minimization module; networked workstations;
Networked workstations; perturbation theory; physics
computing; portable parallel implementation; Portable
parallel implementation",
onlinedate = "7 Sep 2004",
thesaurus = "Free energy; Macromolecules; Message passing;
Molecular biophysics; Molecular dynamics method;
Perturbation theory; Physics computing",
treatment = "T Theoretical or Mathematical",
}
@MastersThesis{Viswanathan:1995:PCM,
author = "Kishore Viswanathan",
title = "A parallel client-server model for distributed
computing",
type = "M.S. thesis",
school = "Department of Computer Science, " # inst-MSU,
address = inst-MSU:adr,
pages = "vii + 79",
year = "1995",
bibdate = "Mon Jan 15 16:53:06 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
alttitle = "Distributed computing. Message Passing Interface Forum
(MPIF) MPI-Forum 1994",
keywords = "Client/server computing.; Electronic data processing
--- Distributed processing; Mississippi State
University --- Thesis --- (1995); Parallel programming
(computer science)",
}
@InProceedings{Vlassov:1995:MEP,
author = "V. Vlassov and H. Ahmed and L.-E. Thorelli",
title = "{mEDA-2}: An Extension of {PVM}",
crossref = "Malyshkin:1995:PCT",
pages = "288--293",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Electrum 204, R. Inst. of Technol., Kista, Sweden",
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
corpsource = "Electrum 204, R. Inst. of Technol., Kista, Sweden",
keywords = "communication; EDA model; environments; intertask;
Intertask communication; mEDA-2; MEDA-2; message
passing; Message passing; parallel program; Parallel
program termination; parallel programming; parallel
programs; Parallel programs; programming; programming
environments; Programming environments; PVM; shared
memory systems; synchronisation; synchronization;
Synchronization; termination; virtual shared memory;
Virtual shared memory; VSM",
pubcountry = "Germany",
thesaurus = "Message passing; Parallel programming; Programming
environments; Shared memory systems; Synchronisation",
treatment = "P Practical",
}
@InProceedings{Walker:1995:MVB,
author = "D. W. Walker",
title = "An {MPI} version of the {BLACS}",
crossref = "IEEE:1995:PSP",
pages = "129--146",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
classification = "C4140 (Linear algebra); C6110B (Software engineering
techniques); C6150N (Distributed systems software)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
keywords = "Basic Linear Communication Subprograms; BLACS;
blocking; Blocking; functionality; Functionality;
linear algebra; message passing; message passing
standard; Message passing standard; MPI; MPI
communication modes; MPI Linear Algebra Communication
Subprograms; nonblocking communication; Nonblocking
communication; software libraries; standards;
subroutines",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Linear algebra; Message passing; Software libraries;
Standards; Subroutines",
treatment = "P Practical",
}
@TechReport{Walker:1995:RBD,
author = "David W. Walker and Steve W. Otto",
title = "Redistribution of Block-Cyclic Data Distributions
Using {MPI}",
number = "ORNL/TM-12999",
institution = inst-ORNL,
address = inst-ORNL:adr,
pages = "iii + 20",
month = jun,
year = "1995",
bibdate = "Tue Jan 16 08:37:06 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/~walker/mpi/redistribution.ps.Z",
}
@InProceedings{Wang:1995:PPG,
author = "Cho-Li Wang and V. K. Prasanna and Young Won Lim",
title = "Parallelization of perceptual grouping on distributed
memory machines",
crossref = "Cantoni:1995:CCA",
pages = "323--330",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Electr. Eng. Syst., Univ. of Southern
California, Los Angeles, CA, USA",
classification = "B6140C (Optical information, image and video signal
processing); C4240C (Computational complexity); C4240P
(Parallel programming and algorithm theory); C5220P
(Parallel architecture); C5260B (Computer vision and
image processing techniques); C5440 (Multiprocessing
systems); C6110P (Parallel programming)",
keywords = "16 Node Cray T3D; Architecture independent parallel
algorithms; CM-5; Communication startup time;
Communication time; Computation time; Distributed
memory machines; High performance computing platforms;
Line segment extraction; MPI message passing standard;
Perceptual grouping; Processing nodes; Transmission
rate",
thesaurus = "Communication complexity; Computational complexity;
Distributed memory systems; Edge detection; Feature
extraction; Message passing; Parallel algorithms",
}
@Article{Wasniowski:1995:NAP,
author = "R. A. Wasniowski",
title = "Nonlinear adaptive prediction algorithm and its
parallel implementation",
journal = j-INFORMATICA,
volume = "19",
number = "3",
pages = "371--377",
month = sep,
year = "1995",
CODEN = "INFOFF",
ISSN = "0350-5596",
ISSN-L = "0350-5596",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "New Mexico Highlands Univ., Las Vagas, NM, USA",
classification = "C1220 (Simulation, modelling and identification);
C1240 (Adaptive system theory); C4240P (Parallel
programming and algorithm theory); C6110P (Parallel
programming); C6185 (Simulation techniques); C7400
(Engineering computing)",
fjournal = "Informatica (Ljubljana, Slovenia)",
keywords = "Computation times; Computationally-intensive
engineering problems; Cost/performance ratio; Group
method of data handling; Heterogeneous machines; Large
parallel programs; Massively parallel computers;
Nonlinear adaptive prediction algorithm; Parallel
algorithm development; Parallel simulators; PVM;
Software packages; Systems identification; Workstation
networks",
pubcountry = "Slovenia",
thesaurus = "Adaptive estimation; Digital simulation; Engineering
computing; Forecasting theory; Identification; Parallel
algorithms",
}
@TechReport{Werner:1995:UMP,
author = "J{\"o}rg Werner",
title = "{{\"U}berblick zum Message-Passing-Interface Standard,
MPI}. ({German}) [{Overview} of the {Message-Passing
Interface Standard, MPI}]",
type = "{Parlab-Mitteilungen}",
number = "04/95",
institution = "Technische Universit{\"a}t Chemnitz-Zwickau",
address = "Chemnitz, Germany",
pages = "35",
year = "1995",
bibdate = "Wed Aug 27 06:21:48 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
language = "German",
}
@InProceedings{West:1995:AVV,
author = "J. E. West and M. M. Stephens and L. H. Turcotte",
title = "Adaptation of volume visualization techniques to
{MIMD} architectures using {MPI}",
crossref = "IEEE:1995:PSP",
pages = "147--156",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "DoD High Performance Comput. Center, US Army Eng.
Waterways Exp. Station, Vicksburg, MS, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C5440 (Multiprocessing systems); C6130B (Graphics
techniques); C6150N (Distributed systems software);
C7300 (Natural sciences computing)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "DoD High Performance Comput. Center, US Army Eng.
Waterways Exp. Station, Vicksburg, MS, USA",
keywords = "data visualisation; distributed memory parallel
computers; Distributed memory parallel computers;
distributed memory systems; divide and conquer methods;
divide-and-conquer approach; Divide-and-conquer
approach; high resolution volume datasets; High
resolution volume datasets; interprocessor
communication; Interprocessor communication; message
passing; Message Passing Interface; MIMD architectures;
MPI; nCUBE 2; NCUBE 2; parallel algorithm; Parallel
algorithm; parallel algorithms; parallel architectures;
parallel implementation; Parallel implementation;
parallel machines; rendering (computer graphics); scene
generation; Scene generation; scientific analysis;
Scientific analysis; sequential algorithm; Sequential
algorithm; standards; volume rendering method; Volume
rendering method; volume visualization techniques;
Volume visualization techniques",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Data visualisation; Distributed memory systems; Divide
and conquer methods; Message passing; Parallel
algorithms; Parallel architectures; Parallel machines;
Rendering [computer graphics]; Standards",
treatment = "A Application; P Practical; T Theoretical or
Mathematical",
}
@Article{White:1995:PNP,
author = "S. White and A. Alund and V. S. Sunderam",
title = "Performance of the {NAS} Parallel Benchmarks on
{PVM-Based} Networks",
journal = j-J-PAR-DIST-COMP,
volume = "26",
number = "1",
pages = "61--71",
day = "1",
month = apr,
year = "1995",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1995.1048",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:18:57 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1048/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1995.1048/production/pdf",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C6100 (Software techniques and
systems); C7320 (Physics and chemistry computing)",
corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "aerodynamics; computational; computing; concurrent;
Ethernet; FDDI networks; kernel benchmarks; NAS
parallel benchmarks; parallel processing; performance
evaluation; PVM system; PVM-based networks; software
performance evaluation",
treatment = "P Practical",
}
@InProceedings{Xu:1995:IPP,
author = "H. Xu and T. W. Fisher",
title = "Improving {PVM} Performance using {ATOMIC} User-Level
Protocol",
crossref = "Alnuweiri:1995:PHF",
pages = "108--117",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Yonezawa:1995:IED,
author = "Naoki Yonezawa and Koichi Wada and Motoko Obata",
title = "Implementation and evaluation of distributed shared
data objects on a workstation cluster",
crossref = "IEEE:1995:IPR",
pages = "319--322",
year = "1995",
bibdate = "Fri May 24 09:58:00 MDT 1996",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95CH35765.",
abstract = "We are developing a system called KaReN to handle
distributed shared data objects on workstations that
are connected by Ethernet. The system supplied users a
parallel programming environment with virtually shared
data objects. The KaReN was developed using the message
passing library PVM (Parallel Virtual Machine) to have
good portability. To reduce overhead in maintaining
data coherence, several methods are introduced. The
request merging is introduced to reduce message
traffic. The copy transfer messages are also clumped
when possible. The weak consistency is another
optimization for eliminating unnecessary coherence
control message by allowing temporally inconsistent
state. This paper presents the organization and the
implementation of KaReN. Several applications have been
executed for evaluation.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Tsukuba",
affiliationaddress = "Ibaraki, Jpn",
classification = "722.3; 722.4; 723.1; 723.2; 921.5; C5620L (Local
area networks); C6110P (Parallel programming); C6115
(Programming support); C6150N (Distributed systems
software)",
conference = "Proceedings of the 1995 IEEE Pacific RIM Conference on
Communications, Computers, and Signal Processing",
journalabr = "IEEE Pac RIM Conf Commun Comput Signal Process Proc",
keywords = "Coherence control message; Computer networks; Computer
software portability; Computer workstations; Copy
transfer messages; Data coherence; Data handling; Data
structures; Distributed computer systems; Distributed
shared data objects; Ethernet; KaReN; Message passing
library; Message traffic reduction; Object oriented
programming; Optimization; Parallel programming
environment; Parallel virtual machine; Parallel virtual
machine (PVM); Portability; Subroutines; Virtually
shared data objects; Weak consistency; Workstation
cluster",
meetingaddress = "Victoria, BC, Can",
meetingdate = "May 17--19 1995",
meetingdate2 = "05/17--19/95",
sponsor = "IEEE",
thesaurus = "Local area networks; Message passing; Network
operating systems; Parallel programming; Programming
environments; Software portability; Virtual machines;
Workstations",
}
@Article{Yong:1995:SOM,
author = "Dou Yong and Zhou Xingming",
title = "Super-Object model: implementing shared memory
programming mode on distributed memory multicomputers",
journal = j-CHIN-J-COMPUTERS,
volume = "18",
number = "7",
pages = "481--487",
month = jul,
year = "1995",
CODEN = "JIXUDT",
ISSN = "0254-4164",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Comput. Sci, Univ. of Defence Technol.,
Changsha, China",
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming)",
fjournal = "Chinese Journal of Computers = Chi suan chi hsueh
pao",
keywords = "Distributed memory multicomputers; Fortran 77; Global
address; Implementation; Message passing primitives;
Oak Ridge PVM; Performance; Prototype system; Run-time
system; Shared memory parallel programming; Shared
memory programming mode; Super-Object model; UNIX
operating system",
language = "Chinese",
pubcountry = "China",
thesaurus = "Distributed memory systems; Message passing; Parallel
programming",
}
@Article{You:1995:EIM,
author = "J. You and E. Pissaloux and W. P. Zhu and H. A.
Cohen",
title = "Efficient image matching: a hierarchical {Chamfer}
matching scheme via distributed system",
journal = j-REAL-TIME-IMAGING,
volume = "1",
number = "4",
pages = "245--259",
month = oct,
year = "1995",
CODEN = "REIMFQ",
ISSN = "1077-2014",
ISSN-L = "1077-2014",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. and Inf. Sci., South Australia Univ.,
SA, Australia",
classification = "B6140C (Optical information, image and video signal
processing); C5220P (Parallel architecture); C5260B
(Computer vision and image processing techniques)",
fjournal = "Real-Time Imaging",
keywords = "Chamfer matching scheme; Distance transform;
Distributed system; Dynamic thresholding; Edge points;
Image matching; Parallel implementation; Parallel
Virtual Machine; Pyramid",
pubcountry = "UK",
thesaurus = "Distributed processing; Image matching; Virtual
machines",
}
@InProceedings{You:1995:PIM,
author = "J. You and W. P. Zhu and E. Pissaloux and H. A.
Cohen",
title = "Parallel image matching on a distributed system",
crossref = "Narashimhan:1995:IIF",
pages = "870--873 (vol. 2)",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. and Inf. Sci, Univ. of South
Australia, The Levels, SA, Australia",
classification = "C4240P (Parallel programming and algorithm theory);
C5260B (Computer vision and image processing
techniques); C6110P (Parallel programming)",
keywords = "Distance transform; Distributed memory multicomputer;
Distributed system; Heavily iterated computation; Image
feature extraction; Image feature pixels; Low cost
heterogeneous PVM network; Message-passing; Object
recognition; Parallel image matching; Parallel virtual
machine; Repeated memory access",
thesaurus = "Feature extraction; Image matching; Message passing;
Object recognition; Parallel algorithms",
}
@InProceedings{Zareski:1995:EPG,
author = "D. Zareski and B. Wade and P. Hubbard and P. Shirley",
title = "Efficient parallel global illumination using density
estimation",
crossref = "Uselton:1995:PRS",
pages = "47--54, 104--105",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Program of Comput. Graphics, Cornell Univ., Ithaca,
NY, USA",
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6130B (Graphics
techniques); C6150N (Distributed systems software)",
keywords = "Arbitrary nondiffuse surfaces; Density estimation;
Diffuse inter-reflections; Efficient parallel global
illumination; Energy transport; Gouraud-shaded
elements; High geometric complexity environments;
Interactive walk-throughs; Local area network; Master
task; Meshing phase; Multicomputer parallel density
estimation global illumination method; Multiple worker
tasks; Parallel programs; Parallelization;
Particle-tracing phase; PVM software package;
Radiosity; Ray-traced images; Shared file system; Still
frames; Workstations",
thesaurus = "Brightness; Density; Lighting; Local area networks;
Parallel algorithms; Parallel programming; Ray tracing;
Realistic images; Rendering [computer graphics];
Workstations",
}
@InProceedings{Zelek:1995:DPP,
author = "J. S. Zelek",
title = "Dynamic path planning",
crossref = "IEEE:1995:IIC",
pages = "1285--1290 (vol. 2)",
year = "1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Electr. Eng., McGill Univ., Montreal, Que.,
Canada",
classification = "C1230 (Artificial intelligence); C3390C (Mobile
robots); C7420 (Control engineering computing)",
keywords = "Dynamic path planning; Harmonic function; Message
passing software package; Navigation; Nomad robot;
Potential field; PVM; SPARC and SGI workstations",
thesaurus = "Computerised control; Dynamics; Harmonics; Message
passing; Mobile robots; Navigation; Path planning",
}
@InProceedings{Zhou:1995:FMP,
author = "H. Zhou and A. Geist",
title = "Faster Message Passing in {PVM}",
crossref = "Alnuweiri:1995:PHF",
pages = "67--73",
year = "1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Zhou:1995:RMR,
author = "Honbo Zhou and Al Geist",
title = "``Receiver Makes Right'' Data Conversion in {PVM}",
crossref = "IEEE:1995:CPI",
pages = "458--464",
year = "1995",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Using a Receiver Makes it Right (RMR) data conversion
technique in PVM significantly improves the
message-passing performance in heterogeneous
environments. The improvements are due to two factors:
(1). RMR reduces the need for conversions in a
heterogeneous environment; (2). At most each message is
converted, only once compared to twice for XDR used in
public version of PVM, and our conversion routines are
streamlined and are several times faster than the XDR
routines. The drawback to RMR is the potential need for
a large number of conversion routines. We demonstrate
that only a small number of routines are required
because many vendors use the IEEE standard for data
representation. Given this fact, RMR may emerge as a
promising technique in distributed computing.",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab.",
affiliationaddress = "Oak Ridge, TN, USA",
classification = "722.1; 722.3; 722.4; 723.1; 723.2; C5440
(Multiprocessing systems); C6120 (File organisation);
C7430 (Computer engineering)",
conference = "Proceedings of the 1995 IEEE 14th Annual International
Phoenix Conference on Computers and Communications",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
journalabr = "Conf Proc Int Phoenix Conf on Comput Commun",
keywords = "Buffer storage; Computer software; Computer systems
programming; conversion; Conversion routines; Data
communication systems; data conversion; Data
processing; data structures; Decoding; distributed
computing; Distributed computing; Encoding (symbols);
heterogeneous environments; Heterogeneous environments;
Local area networks; machines; Message passing
performance, Data conversion; message-; Message-passing
performance; parallel machines; Parallel processing
systems; parallel virtual machine; Parallel virtual
machine; Parallel virtual machine (PVM); passing
performance; PVM; Receiver makes it right (RMR) data
conversion; routines; virtual",
meetingaddress = "Scottsdale, AZ, USA",
meetingdate = "Mar 28--31 1995",
meetingdate2 = "03/28--31/95",
thesaurus = "Data conversion; Data structures; Parallel machines;
Virtual machines",
treatment = "P Practical",
}
@Article{Zhu:1995:RTC,
author = "Miaoliang Zhu and Chunming Wu and Youjun Zhang and Yi
Jin and Jie Li",
title = "A real-time and concurrent intelligent robotic system
based on multi-agent architecture",
journal = j-HIGH-TECH-LETT,
volume = "5",
number = "10",
pages = "20--24",
month = oct,
year = "1995",
CODEN = "GTONE8",
ISSN = "1002-0470",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Artificial Intelligence Inst., Zhejiang Univ.,
Hangzhou, China",
classification = "C1340D (Discrete control systems); C3390C (Mobile
robots); C4220 (Automata theory); C5220P (Parallel
architecture); C6150N (Distributed systems software);
C6170 (Expert systems); C7420 (Control engineering
computing); C7430 (Computer engineering)",
fjournal = "High Technology Letters",
keywords = "Automata; Autonomous mobile robots; Concurrent
intelligent robotic system; Discrete event-finite state
transformation model; Intelligent architecture;
Multi-agent architecture; Multi-computer coherence
environment; Parallel virtual machine; Pipeline
scheduler; PVM; Real-time Multi-Agent System; RMAS;
ROBIX; Simulation",
language = "Chinese",
pubcountry = "China",
thesaurus = "Cooperative systems; Discrete event systems; Finite
automata; Intelligent control; Mobile robots; Parallel
processing; Pipeline processing; Real-time systems;
Scheduling; Virtual machines",
}
@InProceedings{Zhuang:1995:PRS,
author = "Xinglai Zhuang and Jianping Zhu",
title = "Parallelizing a reservoir simulator using {MPI}",
crossref = "IEEE:1995:PSP",
pages = "165--174",
year = "1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "NSF Eng. Res. Center for Comput. Field Simulations,
Mississippi State Univ., MS, USA",
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software); C7340 (Geophysics computing); C7490
(Computing in other engineering fields)",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "NSF Eng. Res. Center for Comput. Field Simulations,
Mississippi State Univ., MS, USA",
keywords = "customized communication library; Customized
communication library; customized communication
subroutines; Customized communication subroutines;
digital simulation; geophysics computing; IBM SP1/SP2;
Intel; Intel iPSC/860; message passing; Message Passing
Interface; MPI; NX communication library; oil
technology; parallel architecture; Parallel
architecture; parallel code performance; Parallel code
performance; parallel code portability; Parallel code
portability; parallel computers; Parallel computers;
parallel programming; performance; Performance;
reservoir simulator; Reservoir simulator; scalability;
Scalability; software libraries; standards;
subroutines; workstation clusters; Workstation
clusters",
sponsororg = "Mississippi State Univ.; NSF",
thesaurus = "Digital simulation; Geophysics computing; Message
passing; Oil technology; Parallel programming; Software
libraries; Standards; Subroutines",
treatment = "A Application; P Practical",
}
@InProceedings{Alt:1996:PIA,
author = "R. Alt and J. L. Lamotte",
title = "Parallel integration across time of initial value
problems using {PVM}",
crossref = "Bode:1996:PVM",
pages = "323--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4130 (Interpolation and function approximation);
C4170 (Differential equations)C6150N (Distributed
systems software); C4240P (Parallel programming and
algorithm theory); C7310 (Mathematics computing)",
corpsource = "MASI and Institut Blaise Pascal, Paris, France",
keywords = "approximation theory; collocation; Connection Machine
CM5; differential; differential equations; distributed
architectures; divided differences; equation; initial
value; initial value problems; linear system;
mathematics computing; method; nonlinear system;
parallel; parallel algorithm; parallel algorithms;
parallel integration; parallel machines; Picard
iterations; polynomial approximation; problems; virtual
machine",
pubcountry = "Germany",
treatment = "T Theoretical or Mathematical",
}
@Article{Anglano:1996:PMB,
author = "C. Anglano and L. Portinale",
title = "Parallel Model-Based Diagnosis Using {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "331--334",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)C1160 (Combinatorial mathematics);
C7440 (Civil and mechanical engineering computing)",
corpsource = "Dipartimento di Inf., Universita' di Torino, Italy",
fjournal = "Lecture Notes in Computer Science",
keywords = "automobiles; car fault diagnosis; computing; fault
diagnosis; identification; mechanical engineering;
message passing; methods; MIMD message passing program;
parallel; parallel backward reachability; parallel
machines; parallel model-based diagnosis; parallel
programs; parallel virtual machine; Petri net model;
Petri nets; programming; reachability analysis; space;
state; state-space; virtual machines",
pubcountry = "Germany",
treatment = "A Application; P Practical",
}
@Article{Anonymous:1996:BRMh,
author = "Anonymous",
title = "Book Review: {{\booktitle{MPI: the compete
reference}}: By Marc Snir, Steve Otto, Steven
Huss-Lederman, David Walker, and Jack Dongarra. MIT
Press, Cambridge, MA. (1996). 336 pages. \$27.50}",
journal = j-COMPUT-MATH-APPL,
volume = "31",
number = "11",
pages = "140--140",
month = jun,
year = "1996",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:23 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/0898122196873494",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Misc{Anonymous:1996:IPP,
author = "Anonymous",
title = "An Introduction to {PVM} Programming",
howpublished = "World-Wide Web",
year = "1996",
bibdate = "Tue Jan 16 08:17:36 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/pvm/intro.html",
}
@Misc{Anonymous:1996:PPA,
author = "Anonymous",
title = "Porting {PVM} Applications to the {Intel Paragon}",
howpublished = "World-Wide Web",
year = "1996",
bibdate = "Tue Jan 16 08:25:19 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.ccs.ornl.gov/news/guide/xps_pvm.html",
}
@Misc{Anonymous:1996:RP,
author = "Anonymous",
title = "Research Program",
howpublished = "World-Wide Web",
year = "1996",
bibdate = "Tue Jan 16 08:26:39 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/networking/",
}
@InProceedings{Arbenz:1996:MDS,
author = "P. Arbenz and M. Billeter and P. G{\"u}ntert and P.
Luginb{\"u}hl and M. Taufer and U. {von Matt}",
title = "Molecular dynamics simulations on {Cray} clusters
using the {SCIDDLE-PVM} environment",
crossref = "Bode:1996:PVM",
pages = "142--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A0260 (Numerical approximation and analysis); A0270
(Computational techniques); A0320 (Classical mechanics
of discrete systems: general mathematical aspects);
A6120J (Computer simulation of static and dynamic
liquid behaviour); A8715H (Biomolecular dynamics,
molecular probes, molecular pattern recognition);
C6110P (Parallel programming); C6150N (Distributed
systems software); C7320 (Physics and chemistry
computing); C7330 (Biology and medical computing)",
corpsource = "Inst. of Sci. Comput., Swiss Federal Inst. of
Technol., Zurich, Switzerland",
keywords = "acids; asynchronous remote procedure calls; atom
trajectory computation; biochemistry; biology
computing; classical mechanics; client-server;
communication; computer simulation; computing; Cray
clusters; Cray computers; digital simulation;
distributed algorithms; energy minimization;
environment; minimisation; molecular biophysics;
molecular dynamics method; molecular dynamics
simulations; Newtonian equations of motion; nucleic;
OPAL; paradigm; parallelization; physics; primitive;
proteins; SCIDDLE-PVM; software package; virtual
machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Arbenz:1996:SRP,
author = "P. Arbenz and W. Gander and H. P. L{\"u}thi and U.
{von Matt}",
title = "{Sciddle} 4.0, or, remote procedure calls in {PVM}",
crossref = "Liddell:1996:HPC",
pages = "820--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150N (Distributed systems software)",
corpsource = "Inst. of Sci. Comput., Swiss Federal Inst. of
Technol., Zurich, Switzerland",
keywords = "client process; client-server systems; data transfers;
explicit; large data sets; message passing; overhead;
parallel processing; parallelism; processes; remote
procedure calls; Sciddle 4.0; server; tree structure",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Attiya:1996:ERS,
author = "H. Attiya",
title = "Efficient and Robust Sharing of Memory in
Message-Passing Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1151",
pages = "56--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bachem:1996:STH,
author = "A. Bachem and Hochst{\"a}ttler and M. Malich",
title = "The Simulated Trading Heuristic for Solving Vehicle
Routing Problems",
journal = j-DISCRETE-APPL-MATH,
volume = "65",
number = "1-3",
institution = "Mathematisches Institut, Universit{\"a}t zu K{\"o}ln",
address = "Weyertal 86-90, 50931 K{\"o}ln, Germany",
pages = "47--72",
month = "????",
year = "1996",
CODEN = "DAMADU",
ISSN = "0166-218X (print), 1872-6771 (electronic)",
ISSN-L = "0166-218X",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Techreports/ZPR.Koeln.bib",
annote = "We present an improvement heuristic for vehicle
routing problems. The heuristic finds complex customer
interchanges to improve an initial solution. Our
approach is modular, thus it is easily adjusted to
different side constraints such as time windows,
backhauls and a heterogeneous vehicle fleet. The
algorithm is well suited for parallelization. We report
on a parallel implementation of the Simulated Trading
heuristic on a cluster of workstations using PVM. The
computational results obtained with sequential and
parallel Simulated Trading show that our approach is
competitive compared to all heuristics known to the
authors by now.",
crindex = "120k,29,zpr93-139.ps.gz",
fjournal = "Discrete Applied Mathematics",
xxnote = "Check final page number??",
}
@Article{Bader:1996:PPA,
author = "David A. Bader and David R. Helman and Joseph
J{\'a}J{\'a}",
title = "Practical parallel algorithms for personalized
communication and integer sorting",
journal = j-ACM-J-EXP-ALGORITHMICS,
volume = "1",
pages = "3:1--3:??",
month = "????",
year = "1996",
CODEN = "????",
DOI = "https://doi.org/10.1145/235141.235148",
ISSN = "1084-6654",
bibdate = "Mon Oct 6 16:01:58 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "A fundamental challenge for parallel computing is to
obtain high-level, architecture independent, algorithms
which efficiently execute on general-purpose parallel
machines. With the emergence of message passing
standards such as MPI, it has become easier to design
efficient and portable parallel algorithms by making
use of these communication primitives. While existing
primitives allow an assortment of collective
communication routines, they do not handle an important
communication event when most or all processors have
non-uniformly sized personalized messages to exchange
with each other. We focus in this paper on the
h-relation personalized communication whose efficient
implementation will allow high performance
implementations of a large class of algorithms. While
most previous h-relation algorithms use randomization,
this paper presents a new deterministic approach for
h-relation personalized communication with
asymptotically optimal complexity for h>p$^2$. As an
application, we present an efficient algorithm for
stable integer sorting. The algorithms presented in
this paper have been coded in Split-C and run on a
variety of platforms, including the Thinking Machines
CM-5, IBM SP-1 and SP-2, Cray Research T3D, Meiko
Scientific CS-2, and the Intel Paragon. Our
experimental results are consistent with the
theoretical analysis and illustrate the scalability and
efficiency of our algorithms across different
platforms. In fact, they seem to outperform all similar
algorithms known to the authors on these platforms.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Journal of Experimental Algorithmics",
}
@InProceedings{Barak:1996:PPM,
author = "A. Barak and A. Braverman and I. Gilderman and O.
Laden",
title = "Performance of {PVM} with the {MOSIX} preemptive
process migration scheme",
crossref = "IEEE:1996:PSI",
pages = "38--45",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6115 (Programming support); C6150J
(Operating systems); C6150N (Distributed systems
software)",
corpsource = "Inst. of Comput. Sci., Hebrew Univ., Jerusalem,
Israel",
keywords = "allocation; assignments; communication bound
benchmarks; computing; CPU benchmarks; high
performance; idle workstation use; load-balancing;
local area networks; MOSIX multicomputer operating
system; MOSIX preemptive process migration scheme;
multi-tasking applications; multiprogramming; network
operating; operating systems (computers); parallel
algorithms; parallel computing; parallel programming;
process migration; process migration algorithms;
programming environments; PVM performance; resource;
software performance evaluation; static process
assignment; system utilization; systems; task;
transparent; UNIX; Unix; workstation networks;
workstations",
sponsororg = "IEEE Computer. Soc., Israel Chapter",
treatment = "P Practical",
}
@InProceedings{Beguelin:1996:TMD,
author = "A. Beguelin and V. Sunderam",
title = "Tools for monitoring, debugging, and programming in
{PVM}",
crossref = "Bode:1996:PVM",
pages = "7--13",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6140D (High level languages); C6150G
(Diagnostic, testing, debugging and evaluating
systems)",
corpsource = "Carnegie Mellon Univ., Pittsburgh, PA, USA",
keywords = "authoring languages; buffered tracing; data
visualisation; debugging tools; graphical console; Java
language; JavaPVM; JPVM; languages; object-oriented;
ParaGraph visualization tool; parallel programming;
Parallel Virtual Machine; PGPVM; PIOUS; program
debugging; program monitoring tools; program tracing;
programming; PVaniM; PVM; PVMRPC; remote procedure
style; sampling; software; system monitoring; TCL;
techniques; tkPVM; tools; virtual machines; XPVM",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Bernaschi:1996:RHP,
author = "Massimo Bernaschi",
title = "The requirements of a high performance implementation
of {PVM}",
journal = j-FUT-GEN-COMP-SYS,
volume = "12",
number = "1",
pages = "3--11",
month = may,
year = "1996",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jul 15 09:06:07 MDT 2005",
bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing); C6150N (Distributed systems
software); C7430 (Computer engineering)",
corpsource = "IBM Eur. Center for Sci. and Eng. Comput., Rome,
Italy",
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
keywords = "AIX operating system; de facto standard; distributed
computing; high performance implementation; IBM
parallel; machine; message; message passing; parallel
machines; parallel virtual; passing; performance
evaluation; POWER 2 architecture; programming
interface; PVM; PVMe; run-time; SP2; system; system
support; virtual machines",
pubcountry = "Netherlands",
remark = "Resource Management in Distributed Systems",
treatment = "P Practical",
}
@InProceedings{Bhandarkar:1996:MPM,
author = "M. A. Bhandarkar and L. V. Kale",
title = "{MICE}: a prototype {MPI} implementation in {Converse}
environment",
crossref = "IEEE:1996:PSM",
pages = "26--31",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150E (General utility programs); C6150N
(Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Illinois Univ., Urbana, IL,
USA",
keywords = "Abstract Device Interface; application program
interfaces; communication; computations; Converse
interoperable parallel programming environment; message
managers; message passing; MICE; MPI modules; MPICH;
multi-threaded MPI programs; open systems; parallel
programming; programming environments; prototype MPI
implementation; public-domain MPI implementation; PVM
interoperation; thread objects; utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Blaszczyk:1996:EPI,
author = "A. Blaszczyk and C. Trinitis",
title = "Experience with {PVM} in an industrial environment",
crossref = "Bode:1996:PVM",
pages = "174--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B5110 (Electrostatics); B8300 (Power apparatus and
electric machines); C6110P (Parallel programming);
C6150N (Distributed systems software); C6155 (Computer
communications software); C7410 (Electrical engineering
computing); C7430 (Computer engineering)",
corpsource = "Asea Brown Boveri AG, Heidelberg, Germany",
keywords = "3D; Asea Brown Boveri; CAD; cluster; code; computer
communications software; configuration; ease of use;
efficiency; electric fields; electrical engineering
computing; heterogeneous workstation clusters;
high-voltage engineering; high-voltage equipment;
industrial environment; multiprocessor machines;
parallel code; parallel programming; Parallel Virtual
Machine; parallelization; PVM communication software;
reliability; simulation; virtual machines",
pubcountry = "Germany",
treatment = "A Application",
}
@InProceedings{Blum:1996:PIP,
author = "J. M. Blum and T. M. Warschko and W. F. Tichy",
title = "{PSPVM}: implementing {PVM} on a high-speed
interconnect for workstation clusters",
crossref = "Bode:1996:PVM",
pages = "235--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5620L (Local area
networks); C6150N (Distributed systems software); C6180
(User interfaces)",
corpsource = "Dept. of Inf., Karlsruhe Univ., Germany",
keywords = "25 mus; application speed-up; code compatibility;
exchange; latency; local area networks; message;
message passing; message transmission; multiprocessing;
object-; parallel machines; ParaStation high-speed
interconnect; ParaStation user interface; PSPVM; PVM
package; systems; throughput; user interfaces; user
level communication; user-level socket emulation;
workstation clusters; workstations",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Bonnet:1996:UPW,
author = "C. Bonnet",
title = "Using {PVM} in wireless network environments",
crossref = "Bode:1996:PVM",
pages = "296--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6210L (Computer communications); C5470 (Performance
evaluation and testing); C5620L (Local area networks);
C5670 (Network performance)",
corpsource = "Inst. Eurecom, Sophia Antipolis, France",
keywords = "message passing model; networked environments;
parallel machines; parallel virtual machine;
performance evaluation; PVM; virtual machines; wireless
LAN; wireless local area network; WLAN",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Bouchard:1996:FCS,
author = "V. Bouchard and P. Cinquin and L. Desbat",
title = "First {Compton} scatter correction in {SPECT} using
{PVM}",
crossref = "Grangeat:1996:PTI",
pages = "109--111",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A8710 (General, theoretical, and mathematical
biophysics); A8760K (Nuclear medicine, emission
tomography); A8770E (Patient diagnostic methods and
instrumentation); B6140C (Optical information, image
and video signal processing); B7510B (Radiation and
radioactivity applications in biomedicine); C4240P
(Parallel programming and algorithm theory); C5260B
(Computer vision and image processing techniques);
C7330 (Biology and medical computing)",
corpsource = "Fac. de Med., TIMC-IMAG, La Tronche, France",
keywords = "3D algorithms; Compton effect; computed tomography;
diagnostic imaging; first Compton interaction;
gamma-ray scattering; Klein-Nishina formula; medical;
medical image; modeling; nuclear medicine; parallel
algorithms; parallel virtual machine; physical;
processing; registered scanner reconstruction; single
photon emission; SPECT Compton scatter correction",
pubcountry = "France",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Brightwell:1996:DIM,
author = "R. Brightwell and L. Shuler",
title = "Design and implementation of {MPI} on {Puma} portals",
crossref = "IEEE:1996:PSM",
pages = "18--25",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6110B (Software engineering
techniques); C6150E (General utility programs); C6150J
(Operating systems); C6150N (Distributed systems
software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Massively Parallel Comput. Res. Lab., Sandia Nat.
Labs., Albuquerque, NM, USA",
keywords = "application program interfaces; Argonne National
Laboratory/Mississippi State University Message Passing
Interface standard implementation; high performance
message passing environment; Intel Paragon; Intel
TeraFLOPS machine; massively parallel computers;
message passing; MPI; MPI collective communication; MPI
point- to-point communications; MPI-2 one-sided
communications; network operating systems; operating
systems (computers); parallel architectures; parallel
machines; Puma operating system; Puma portals; software
portability; SUNMOS; utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Bubak:1996:MPP,
author = "M. Bubak and W. Funika and J. Moscinski",
title = "Monitoring of performance of {PVM} applications on
virtual network computer",
crossref = "Wasniewski:1996:APC",
pages = "147--156",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6150G (Diagnostic, testing, debugging
and evaluating systems); C6150N (Distributed systems
software)",
corpsource = "Inst. of Comput. Sci., AGH, Krakow, Poland",
keywords = "computer; data visualisation; metaformat; metrics;
monitoring; Pablo-based tool; parallel machines;
parallel programming; parallel programs; performance
monitoring; PVM applications; SDDF; software
performance evaluation; software tools; system;
Tape/PVM; toolkit; virtual machines; virtual network;
visualization",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Bubak:1996:PBP,
author = "M. Bubak and W. Funika and J. Moscinski and D. Tasak",
title = "Pablo-based performance monitoring tool for {PVM}
applications",
crossref = "Dongarra:1996:APC",
pages = "69--78",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems)",
corpsource = "Inst. of Comput. Sci., AGH, Krakow, Poland",
keywords = "3-D molecular dynamics program; conjugate gradient
benchmark; Pablo environment; ParaGraph functions;
parallel programming; performance monitoring; PVM
applications; software performance evaluation; system
monitoring; trace file; XPVM",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Bubak:1996:PPM,
author = "M. Bubak and W. Funika and J. Moscinski and D. Tasak",
title = "{Pablo-Based} Performance Monitoring Tool for {PVM}
Applications",
crossref = "Dongarra:1996:APC",
pages = "69--78",
year = "1996",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Cavenaghi:1996:UPS,
author = "M. A. Cavenaghi and R. Spolon and J. E. M.
Perea-Martins and S. G. Domingues and A. {Garcia
Neto}",
title = "Using {PVM} in the simulation of a hybrid dataflow
architecture",
crossref = "Bode:1996:PVM",
pages = "343--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C5220P (Parallel architecture); C5440 (Multiprocessing
systems); C6185 (Simulation techniques); C7430
(Computer engineering)",
corpsource = "Dept. of Comput. Sci., Sao Paulo State Univ., Brazil",
keywords = "data flow computing; digital simulation; hybrid
dataflow architecture; interconnection network;
machines; message passing; message passing environment;
multiplexing; multiprocessor system; optical; optical
interconnections; parallel architectures; parallel
execution; sequential execution; simulator; system;
uniprocessor; virtual; wavelength division; wavelength
division multiplexing; WDM techniques",
pubcountry = "Germany",
treatment = "A Application; P Practical",
}
@Article{Charny:1996:MPV,
author = "B. Charny",
title = "Matrix partitioning on a virtual shared memory
parallel machine",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "7",
number = "4",
pages = "343--355",
month = apr,
year = "1996",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/71.494629",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4140 (Linear algebra); C4240P (Parallel programming
and algorithm theory); C5220P (Parallel architecture)",
corpsource = "Audre Inc., San Diego, CA, USA",
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
keywords = "contention-; free partitionings; load-balanced;
machines; matrix; matrix decomposition; memory
contention; parallel; parallel algorithms; parallel
machine; partitioning; performance issues; shared
memory systems; virtual shared memory; virtual
storage",
treatment = "T Theoretical or Mathematical",
}
@Article{Chengqing:1996:WIP,
author = "Ye Chengqing and Cui Zhenqian",
title = "The ways of improving parallel computing efficiency in
{PVM}",
journal = j-MINI-MICRO-SYSTEMS,
volume = "17",
number = "4",
pages = "12--16",
month = apr,
year = "1996",
CODEN = "XWJXEH",
ISSN = "1000-1220",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6185 (Simulation
techniques); C7430 (Computer engineering)C6150J
(Operating systems)",
corpsource = "State Key Lab. of CAD/CG, Zhejiang Univ., Hangzhou,
China",
fjournal = "Mini-Micro Systems",
keywords = "balancing algorithm; communication overhead; digital
simulation; distributed system environment; hosts;
load; local area network; message exchange; network
partitioning; parallel; parallel computing efficiency;
parallel machines; PVM; resource allocation; strategy;
virtual machine; virtual machines",
language = "Chinese",
pubcountry = "China",
treatment = "P Practical",
}
@Article{Ciampolini:1996:EPM,
author = "A. Ciampolini and C. Stefanelli",
title = "Extending {PVM} to a massively parallel architecture",
journal = j-FUT-GEN-COMP-SYS,
volume = "12",
number = "1",
pages = "13--23",
month = may,
year = "1996",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jul 15 09:06:07 MDT 2005",
bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
classification = "C1250 (Pattern recognition); C5220P (Parallel
architecture); C5260B (Computer vision and image
processing techniques); C5440 (Multiprocessing
systems); C6115 (Programming support); C7430 (Computer
engineering)",
corpsource = "Dipartimento di Elettronica, Inf. e Sistemistica,
Bologna Univ., Italy",
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
keywords = "applications; architecture; computational vision
application; computer vision; fine-grained parallel;
heterogeneous computing; machines; massively parallel
architecture; Meiko Computing Surface; multicomputer;
parallel; parallel architectures; parallel machines;
programming environment; programming environments;
transputer technology; Unix workstations; virtual
machines",
pubcountry = "Netherlands",
remark = "Resource Management in Distributed Systems",
treatment = "A Application; P Practical",
}
@InProceedings{Clematis:1996:CEP,
author = "A. Clematis and V. Gianuzzi",
title = "{CPVM} --- extending {PVM} for consistent
checkpointing",
crossref = "IEEE:1996:PFE",
pages = "67--76",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
corpsource = "Istituto per la Matematica Applicata, CNR, Genova,
Italy",
keywords = "concurrency control; consistent checkpointing; CPVM;
deadlocks; fault-tolerance; global checkpoint-restart
algorithms; job-swapping; migration; nonblocking;
parallel programming; Parallel Virtual Machine; PVM;
software; software fault; software libraries; software
library; software portability; software tools;
termination; tolerance",
treatment = "P Practical",
}
@InProceedings{Clemencon:1996:THM,
author = "C. Clemencon and K. M. Decker and V. R. Deshpande and
A. Endo and J. Fritscher and P. A. R. Lorenzo and N.
Masuda and A. Muller and R. Ruhl and W. Sawyer and B.
J. N. Wylie and F. Zimmermann",
title = "Tools-supported {HPF} and {MPI} parallelization of the
{NAS} parallel benchmarks",
crossref = "IEEE:1996:FSS",
pages = "309--318",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6140D (High level languages); C6150C
(Compilers, interpreters and other processors); C6150G
(Diagnostic, testing, debugging and evaluating
systems)",
conftitle = "Proceedings of 6th Symposium on the Frontiers of
Massively Parallel Computation (Frontiers '96)",
corpsource = "Centro Svizzero di Calcolo Sci., Manno, Switzerland",
keywords = "Annai tool; code development time; communication
libraries; compilers; distributed memory systems;
FORTRAN; High Performance Fortran; high-level language;
message passing; Message Passing Interface; NAS
parallel benchmarks; NEC Cenju-3 distributed-memory
parallel processor; parallel benchmark kernels;
parallel languages; parallel programming; performance;
portable parallel applications; program compilers;
program debugging; scalability; scientific
applications; sequential languages; software libraries;
software performance evaluation; software tools",
sponsororg = "IEEE Comput. Soc.; NASA Goddard Space Flight Center;
URSA/CESDIS",
treatment = "P Practical",
}
@InProceedings{Clement:1996:NPM,
author = "Mark J. Clement and Michael R. Steed and Phyllis E.
Crandall",
title = "Network Performance Modeling for {PVM} Clusters",
crossref = "ACM:1996:SCP",
pages = "??--??",
year = "1996",
bibdate = "Mon Mar 23 12:31:18 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.supercomp.org/sc96/proceedings/SC96PROC/CLEMENT/INDEX.HTM",
acknowledgement = ack-nhfb,
}
@Article{Conforti:1996:PIA,
author = "D. Conforti and L. {de Luca} and L. Grandinetti and R.
Musmanno",
title = "A parallel implementation of automatic differentiation
for partially separable functions using {PVM}",
journal = j-PARALLEL-COMPUTING,
volume = "22",
number = "5",
pages = "643--656",
day = "8",
month = aug,
year = "1996",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:14:59 MDT 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1996&volume=22&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1996&volume=22&issue=5&aid=1065",
acknowledgement = ack-nhfb,
classification = "B0290M (Numerical integration and differentiation);
C4160 (Numerical integration and differentiation);
C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "Dipartimento di Elettronica, Inf. e Sistemistica,
Calabria Univ., Italy",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "automatic differentiation; differentiation;
distributed memory; finite-difference approximation;
multiprocessor system; parallel algorithms; parallel
implementation; partially separable functions; PVM;
substantial speed-up",
pubcountry = "Netherlands",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Corbett:1996:OMP,
author = "P. Corbett and D. Feitelson and S. Fineberg and Yarsun
Hsu and B. Nitzberg and J.-P. Prost and M. Snir and B.
Traversat and Parkson Wong",
title = "Overview of the {MPI-IO} parallel {I/O} interface",
crossref = "Jain:1996:IOP",
pages = "127--146",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6120 (File
organisation); C6150N (Distributed systems software);
C6180 (User interfaces)",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
keywords = "asynchronous I/O operations; collective interface;
data structures; file data partitioning; global data
structures; high-level interface; message passing;
MPI-IO parallel I/O interface; parallel file systems;
parallel machine; parallel programming; portable
message passing parallel programs; process memories;
storage devices; user interfaces",
treatment = "A Application; P Practical",
}
@InProceedings{Cotronis:1996:ECP,
author = "J. Y. Cotronis and E. Floros and N. Papazis",
title = "Efficient composition of {PVM} programs",
crossref = "Liddell:1996:HPC",
pages = "919--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming)",
corpsource = "Dept. of Inf., Athens Univ., Greece",
keywords = "communication; Distribution of Maximum; parallel
programming; process algebra; PVM; PVM programs;
terminal process; topologies; tree process
communication",
pubcountry = "Germany",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Coulaud:1996:EIP,
author = "O. Coulaud and E. Dillon",
title = "Early implementation of {Para++} with {MPI-2}",
crossref = "IEEE:1996:PSM",
pages = "95--101",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150E (General utility programs); C6150N
(Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Inst. Nat. de Recherche en Inf. et Autom.,
Villers-les- Nancy, France",
keywords = "application program interfaces; C language; dynamic
process chapter; dynamic process management; early
implementation; inter-communicator operations; internal
implementation; LAM 6.0; message passing; Message
Passing Interface; MPI-2; Para++ 2.0; PVM; software
libraries",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Dantas:1996:ILB,
author = "M. A. R. Dantas and E. J. Zaluska",
title = "Improving load balancing in an {MPI} environment with
resource management",
crossref = "Liddell:1996:HPC",
pages = "959--960",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
conftitle = "High-Performance Computing and Networking.
International Conference and Exhibition HPCN Europe
1996",
corpsource = "Dept. of Electron. and Comput. Sci., Southampton
Univ., UK",
keywords = "load balancing; local area networks; message passing;
Message Passing Interface; MPI environment; parallel
programming; process migration; programming
environments; resource allocation; resource management
facility; workstation clusters; workstations",
treatment = "P Practical",
}
@InProceedings{Demaine:1996:FCC,
author = "E. Demaine",
title = "First class communication in {MPI}",
crossref = "IEEE:1996:PSM",
pages = "189--194",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6140D (High level languages); C6150N
(Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Waterloo Univ., Ont., Canada",
keywords = "application program interfaces; C; channel creation;
communication events; Concurrent ML;
concurrent-programming languages; dynamic process
creation; Fortran; higher-order concurrency; message
passing; Message Passing Interface; message-passing;
MPI; Occam; parallel languages; parallel programming;
run- time; software libraries; software standards;
standard; Standard ML; static model",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Deshpande:1996:MIBa,
author = "V. Deshpande and W. Sawyer and D. W. Walker",
title = "An {MPI} implementation of the {BLACS}",
crossref = "IEEE:1996:PSM",
pages = "195--198",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4140 (Linear algebra); C5220P (Parallel
architecture); C6110B (Software engineering
techniques); C6115 (Programming support); C6150E
(General utility programs); C6150N (Distributed systems
software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Swiss Center for Sci. Comput., Manno, Switzerland",
keywords = "application program interfaces; Basic Linear
Communication Subprograms; BLACS; libraries; matrix
algebra; message passing; MPI BLACS implementation; MPI
functionality; MPI libraries; parallel architectures;
performance; software libraries; software performance
evaluation; utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Deshpande:1996:MIBb,
author = "V. Deshpande and W. Sawyer",
title = "An {MPI} implementation of the {BLACS}",
crossref = "IEEE:1996:ICH",
pages = "463--468",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4140 (Linear algebra); C5440 (Multiprocessing
systems); C6150N (Distributed systems software); C7310
(Mathematics computing)",
conftitle = "Proceedings of 3rd International Conference on High
Performance Computing (HiPC)",
corpsource = "Software Technol. Group, Swiss Center for Sci.
Comput., Manno, Switzerland",
keywords = "Basic Linear Algebra Communication Subprograms;
benchmark; BLACS; factorization; linear algebra;
mathematics computing; message passing; Message Passing
Interface; MPI implementation; parallel architectures;
performance; ScaLAPACK library; software libraries;
software packages; software performance evaluation",
sponsororg = "IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee
on Parallel Process.; ACM SIGARCH",
treatment = "P Practical",
}
@InProceedings{Dinda:1996:PIA,
author = "P. A. Dinda and D. R. O'Hallaron",
title = "The performance impact of address relation caching",
crossref = "Szymanski:1996:LCR",
pages = "213--226",
year = "1996",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
keywords = "Address computation; Address relation caching; Cache;
Critical path; Data transfer; Deposit model
communication; Distributed programming; End-to-end
latency; Fine grain analytic model; Memory bandwidth;
Message passing; Parallel programming; Performance
impact",
thesaurus = "Cache storage; Distributed processing; Message
passing; Parallel programming",
}
@InProceedings{DiNucci:1996:CDS,
author = "D. C. DiNucci",
title = "Cooperative Data Sharing: a layered approach to an
architecture-independent {Message-Passing Interface}",
crossref = "IEEE:1996:PSM",
pages = "58--65",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620L (Local area networks); C6150E (General
utility programs); C6150J (Operating systems); C6150N
(Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "NASA Ames Res. Center, Moffett Field, CA, USA",
keywords = "application development; application program
interfaces; architecture-independent message-passing
interface; CDS1; CDS2; communication semantics;
contiguous data; Cooperative Data Sharing System; local
area networks; low-level portable interface; message
passing; Message Passing Kernel project; MPI; network
operating systems; one-sided communication; operating
system kernels; queues; semantics; SGI Power Challenge
Array; Solaris; Sun workstation network; utility
programs; workstations",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@Article{Djordjevic:1996:ICI,
author = "G. L. Djordjevic and M. K. Stojcev",
title = "An interprocessor communication interface for message
passing via shared memory modules-design and
performances",
journal = j-COMP-ART-INTELL,
volume = "15",
number = "1",
pages = "1--34",
month = "????",
year = "1996",
CODEN = "CARIDY",
ISSN = "0232-0274",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5150 (Other circuits for digital computers); C5250
(Microcomputer techniques); C5430 (Microcomputers);
C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing); C5610S (System buses)",
corpsource = "Fac. of Electron. Eng., Nish, Yugoslavia",
fjournal = "Computers and Artificial Intelligence =
Vychislitel'nye mashiny i iskusstvennyi intellekt",
keywords = "communication bandwidth; communication module;
communication throughput; configuration flexibility;
data transfer; fully connected n-side pyramid;
heterogeneous processors; host computer accelerator;
interprocessor communication interface; local memory;
message latency; message passing; microcomputers;
multi-microcomputer system; multiprocessor
interconnection networks; performance evaluation;
shared memory bus; shared memory modules; shared memory
systems; simulation; single board computers; storage
management chips; system buses; system efficiency;
system operation; system topology; two-side accessible
memory chips",
treatment = "P Practical",
}
@Article{Dong:1996:SPL,
author = "Li Dong and Li Xiaoming and Fang Binxing",
title = "The study on the parallel library based on {MPI}",
journal = j-MINI-MICRO-SYSTEMS,
volume = "17",
number = "12",
pages = "17--19",
year = "1996",
CODEN = "XWJXEH",
ISSN = "1000-1220",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
corpsource = "Harbin Inst. of Technol., China",
fjournal = "Mini-Micro Systems",
keywords = "MPI; parallel library; parallel programming; parallel
programming environments; software libraries;
workstation network",
language = "Chinese",
treatment = "P Practical",
}
@Article{Dongarra:1996:MPS,
author = "Jack J. Dongarra and Steve W. Otto and Marc Snir and
David Walker",
title = "A message passing standard for {MPP} and
workstations",
journal = j-CACM,
volume = "39",
number = "7",
pages = "84--90",
month = jul,
year = "1996",
CODEN = "CACMA2",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Mon Aug 26 07:42:43 MDT 1996",
bibsource = "Compendex database; http://www.acm.org/pubs/toc/;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/toc/Abstracts/cacm/234000.html",
abstract = "The Message Passing Interface (MPI) is a portable
message-passing standard that facilitates development
of parallel applications and libraries. MPI has been
developed over a 12-month period in 1993 to 1994 of
intensive meetings involving more than 80 people from
approximately 40 organizations, mainly from the U.S.
and Europe. Programming in MPI is straightforward and
similar to programming with other message-passing
interfaces.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Tennessee",
affiliationaddress = "Knoxville, TN, USA",
classification = "716.1; 722.2; 722.3; 722.4; 723.1; 902.2",
fjournal = "Communications of the ACM",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79",
journalabr = "Commun ACM",
keywords = "algorithms; Application programming interface; C
(programming language); Communication library routines;
Computer networks; Computer software; Computer systems
programming; Computer workstations; Concurrency
control; Concurrent programs; Data communication
systems; design; FORTRAN (programming language);
Interfaces (computer); languages; Massively parallel
processing; Message passing interface; Message passing
programs; Message passing standard; Networks of
workstations; Parallel processing systems; Point to
point communications; Program compilers;
standardization; Standards; Subroutines",
subject = "{\bf D.4.4}: Software, OPERATING SYSTEMS,
Communications Management, Message sending. {\bf
D.2.7}: Software, SOFTWARE ENGINEERING, Distribution
and Maintenance, Portability. {\bf D.2.0}: Software,
SOFTWARE ENGINEERING, General, Standards. {\bf D.2.2}:
Software, SOFTWARE ENGINEERING, Tools and Techniques,
Software libraries. {\bf D.3.2}: Software, PROGRAMMING
LANGUAGES, Language Classifications, Concurrent,
distributed, and parallel languages. {\bf D.1.3}:
Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Parallel programming.",
}
@InProceedings{Dongarra:1996:SRP,
author = "J. J. Dongarra and T. Hey and E. Strohmaier",
title = "Selected results from the {PARKBENCH} benchmark",
crossref = "Bouge:1996:EPP",
volume = "2",
pages = "251--254",
year = "1996",
bibdate = "Sat Mar 22 15:39:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classcodes = "C5220P (Parallel architecture); C5470 (Performance
evaluation and testing); C6150G (Diagnostic, testing,
debugging and evaluating systems)",
conflocation = "Lyon, France; 26-29 Aug. 1996",
conftitle = "Proceedings of European Conference on Parallel
Processing EURO-PAR '96",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "computer testing; evaluation; hierarchical; MPI;
parallel architectures; parallel benchmarks; PARKBENCH
benchmark; performance; PVM; suite",
treatment = "P Practical",
}
@InProceedings{Ebner:1996:TFP,
author = "R. Ebner and A. Pfaffinger",
title = "Transformation of functional programs into data flow
graphs implemented with {PVM}",
crossref = "Bode:1996:PVM",
pages = "251--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4190 (Other numerical methods); C4210L (Formal
languages and computational linguistics); C5620L (Local
area networks); C6110P (Parallel programming); C6115
(Programming support); C6120 (File organisation);
C6140D (High level languages); C6150C (Compilers,
interpreters and other processors); C6150N (Distributed
systems software)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
keywords = "algorithms; automatic coarse-grain program; C
procedure generation; communication; compiler;
compilers; computational linguistics; data flow; data
flow graphs; data structures; distributed tree-like
data structures; dynamic data; FASAN; FASAN schedulers;
function node evaluation; functional; functional
language; functional program transformation; functional
programming; inherent parallelism; languages; local
area networks; maximal; numerical analysis; parallel
programming; parallelising; parallelization; processor
scheduling; PVM library; recursive numerical;
semantics; software libraries; stream flow semantics;
structure; tree; workstation clusters; workstations;
wrapper streams",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Fabero:1996:DLB,
author = "J. C. Fabero and I. Martin and A. Bautista and S.
Molina",
title = "Dynamic load balancing in a heterogeneous environment
under {PVM}",
crossref = "IEEE:1996:PFE",
pages = "414--419",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
corpsource = "Dept. de Inf. y Autom., Univ. Complutense de Madrid,
Spain",
keywords = "allocation; computational load; computational
requirements; computer aided software engineering;
dynamic load balancing; heterogeneous environment;
heterogeneous workstations net; parallel algorithms;
processor scheduling; programming environments;
resource; virtual storage",
treatment = "P Practical",
}
@Article{Fagg:1996:PIP,
author = "Graham Fagg and Jack Dongarra",
title = "{PVMPI}: An Integration of {PVM} and {MPI} Systems",
journal = "Calculateurs Parall{\`e}les",
volume = "8",
number = "2",
pages = "151--166",
year = "1996",
CODEN = "????",
ISSN = "1260-3198",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/utk/papers/pvmpi/paper.html;
http://www.netlib.org/utk/papers/pvmpi/pvmpi.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/pvmpi.pdf",
acknowledgement = ack-nhfb,
}
@InProceedings{Fagg:1996:TGR,
author = "G. E. Fagg and K. S. London and J. J. Dongarra",
title = "Taskers and general resource managers: {PVM}
supporting {DCE} process management",
crossref = "Bode:1996:PVM",
pages = "180--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150E (General utility programs); C6150N
(Distributed systems software); C7430 (Computer
engineering)",
corpsource = "Dept. of Comput. Sci., Tennessee Univ., Knoxville, TN,
USA",
keywords = "allocation schemes; application program interfaces;
DCE process; distributed algorithms; distributed
computing environments; dynamic meta-computing
environments; general resource managers; management;
Message; message passing; MPI; MPIRUN systems;
operations; Parallel Virtual Machine; Passing
Interface; processor scheduling; PVM 3.4 release; PVM
internal; PVMPI project; resource allocation;
schedulers; standardised plug-in; taskers;
user-controlled flexibility; virtual machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Fang:1996:SPP,
author = "N. Fang and H. Burkhart",
title = "Structured parallel programming using {MPI}",
crossref = "Liddell:1996:HPC",
pages = "840--847",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
conftitle = "High-Performance Computing and Networking.
International Conference and Exhibition HPCN Europe
1996",
corpsource = "Dept. of Inf., Basel Univ., Switzerland",
keywords = "higher abstractions; higher-level functions; message
passing; message passing interface; message-passing
programs; parallel programming; portability;
programmer-oriented abstractions; programming
environment; programming environments; structured
parallel programming; system-oriented level",
treatment = "P Practical",
}
@InProceedings{Fineberg:1996:PPI,
author = "S. A. Fineberg and P. Wong and B. Nitzberg and C.
Kuszmaul",
title = "{PMPIO-a} portable implementation of {MPI-IO}",
crossref = "IEEE:1996:FSS",
pages = "188--195",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6150J (Operating systems);
C6150N (Distributed systems software)",
conftitle = "Proceedings of 6th Symposium on the Frontiers of
Massively Parallel Computation (Frontiers '96)",
corpsource = "Numerical Aerodynamic Simulation, NASA Ames Res.
Center, Moffett Field, CA, USA",
keywords = "Cray J90; IBM SP-2; input-output programs; Intel
Paragon; message passing; message passing interface;
MPI-IO; parallel programming; PMPIO; portable I/O
interface; portable implementation; portable parallel
Input/Output interface; portable parallel programming;
SGI; software engineering; software portability; Sun
shared memory workstations",
sponsororg = "IEEE Comput. Soc.; NASA Goddard Space Flight Center;
URSA/CESDIS",
treatment = "P Practical",
}
@InProceedings{Foster:1996:CDT,
author = "I. T. Foster and D. R. {Kohr, Jr.} and R. Krishnaiyer
and Choudhary and A.",
title = "Communicating data-parallel tasks: an {MPI} library
for {HPF}",
crossref = "IEEE:1996:ICH",
pages = "433--438",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6140D (High level
languages)",
conftitle = "Proceedings of 3rd International Conference on High
Performance Computing (HiPC)",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "data-parallel tasks; FORTRAN; High Performance
Fortran; HPF; HPF compiler; MPI library; multiblock
application; multidisciplinary simulations; parallel
programming; performance; pipeline computations;
software performance evaluation; synthetic
communication benchmark; task parallelism",
sponsororg = "IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee
on Parallel Process.; ACM SIGARCH",
treatment = "P Practical",
}
@InProceedings{Foster:1996:DSB,
author = "Ian Foster and David R. {Kohr, Jr.} and Rakesh
Krishnaiyer and Alok Choudhary",
title = "Double Standards: Bringing Task Parallelism to {HPF}
Via the Message Passing Interface",
crossref = "ACM:1996:SCP",
pages = "??--??",
year = "1996",
bibdate = "Mon Mar 23 12:31:18 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.supercomp.org/sc96/proceedings/SC96PROC/FOSTER2/INDEX.HTM",
acknowledgement = ack-nhfb,
}
@InProceedings{Foster:1996:GCM,
author = "I. Foster and C. Kesselman and M. Snir",
title = "Generalized communicators in the {Message Passing
Interface}",
crossref = "IEEE:1996:PSM",
pages = "42--49",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110J (Object-oriented programming); C6110P
(Parallel programming); C6150E (General utility
programs); C6150N (Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "application program interfaces; collective
communication operations; dynamic endpoint creation;
dynamically created threads; endpoint transfer;
generalized communicator construct; generalized MPI
communicator concept; message passing; Message Passing
Interface; multiple communication endpoints; multiple
threads; object- oriented programming; object-oriented
applications; parallel programming; utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Foster:1996:MCL,
author = "I. T. Foster and D. R. {Kohr, Jr.} and R.
Krishnaiyer",
title = "{MPI} as a coordination layer for communicating {HPF}
tasks",
crossref = "IEEE:1996:PSM",
pages = "68--78",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C6140D (High level languages); C6150E (General utility
programs); C6150N (Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
keywords = "application kernel; application program interfaces;
binding; communication interface semantics;
communications microbenchmark; coordination library
calls; data parallelism; data-parallel languages;
distributed array; execution model; explicit message
passing; FORTRAN; High Performance Fortran task
communication; high- level operations; libraries;
library; message passing; Message Passing Interface;
MPI coordination layer; parallel languages; parallel
program development; parallel programming; performance
evaluation; prototype HPF/MPI library; sequential
languages; software libraries; software performance
evaluation; task parallelism; utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Foster:1996:MIW,
author = "I. Foster and J. Geisler and S. Tuecke",
title = "{MPI} on the {I-WAY}: a wide-area, multimethod
implementation of the {Message Passing Interface}",
crossref = "IEEE:1996:PSM",
pages = "10--17",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620W (Other computer networks); C6110B (Software
engineering techniques); C6115 (Programming support);
C6130S (Data security); C6150E (General utility
programs); C6150N (Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Argonne Nat. Lab., IL, USA",
keywords = "application program interfaces; authentication;
automatic configuration mechanisms; communication
mechanisms; geographically distributed computing
resources; geographically distributed database
resources; geographically distributed graphics
resources; geographically distributed networking;
heterogeneous systems; high-speed wide-area networks;
I-WAY distributed- computing experiment; message
authentication; message passing; Message Passing
Interface; MPICH; Nexus multithreaded runtime system;
parallel programming; portable high-performance
programming model; process creation; programming
environments; software environment; software libraries;
utility programs; wide area networks",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Geist:1996:APP,
author = "G. A. Geist",
title = "Advanced programming in {PVM}",
crossref = "Bode:1996:PVM",
pages = "1--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150G (Diagnostic, testing, debugging and
evaluating systems); C6150N (Distributed systems
software)",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "advanced programming; application performance;
applications; CUMULVS; distributed computing
applications; fault tolerance; interactive; JavaPVM;
message passing; parallel computing; parallel
programming; Parallel Virtual Machine; performance
evaluation; plug-ins; program debugging; PVM; software;
software fault tolerance; software packages; TkPVM;
virtual machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Geist:1996:MEM,
author = "A. Geist and W. Gropp and S. Huss-Lederman and A.
Lumsdaine and E. Lusk and W. Saphir and T. Skjellum and
M. Snir",
title = "{MPI-2}: extending the {Message-Passing Interface}",
crossref = "Bouge:1996:EPP",
pages = "128--135",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5610 (Computer
interfaces)",
conftitle = "Proceedings of European Conference on Parallel
Processing EURO-PAR '96",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "collective operations; computer interfaces; dynamic
process management; extensions; external interfaces;
language binding; message passing; Message Passing
Interface; MPI; MPI-2; MPI-2 document; one-sided
operations; real-time computing; standards",
treatment = "P Practical",
}
@TechReport{Geist:1996:VDP,
author = "G. A. Geist and James Kohn and Philip Papadopoulos",
title = "Visualization, Debugging, and Performance in {PVM}",
institution = inst-ORNL,
address = inst-ORNL:adr,
pages = "11",
year = "1996",
bibdate = "Tue Jan 16 08:22:10 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/~geist/CapeCod.ps",
}
@Article{Gennart:1996:CAG,
author = "B. A. Gennart and J. {Tarraga Gimenez} and R. D.
Hersch",
title = "Computer-Assisted Generation of {PVM\slash C++}
Programs Using {CAP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "259--269",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110F (Formal methods); C6110P (Parallel
programming); C6115 (Programming support); C6140D (High
level languages); C6150N (Distributed systems
software)",
corpsource = "Ecole Polytech. Federale de Lausanne, Switzerland",
fjournal = "Lecture Notes in Computer Science",
keywords = "algorithm parallelization; algorithms; automatic
programming; C language; C++; CAP; communication
library; computation description; Computer-Aided
Parallelization; computer-assisted; computer-assisted
C++ program generation; data transfer requirements;
formal specification; language extension; machine;
message exchange; message passing; MPMD program;
object-oriented languages; ordering; parallel; parallel
program writing; parallel programming; performance;
processors; PVM program generation; sequential code;
sequential operation; sequential operations;
specification; synchronisation; synchronization; thread
execution; thread mapping; threads",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Ghosh:1996:ELM,
author = "K. Ghosh and S. Breit",
title = "Evaluating the Limits of Message Passing via the
Shared Attraction Memory on {CC-COMA} Machines:
Experiences with {TCGMSG} and {PVM}",
crossref = "ACM:1996:FCP",
pages = "173--180",
year = "1996",
bibdate = "Wed Mar 18 12:33:18 MST 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
annote = "Also known as ICS'96. Held as part of the Federated
computing research conference (FCRC'96)",
keywords = "ACM; architecture; computer; FCRC; ICS; SIGARCH;
supercomputing",
}
@InProceedings{Gold:1996:UAL,
author = "C. Gold and T. Schnekenburger",
title = "Using the {ALDY} load distribution system for {PVM}
applications",
crossref = "Bode:1996:PVM",
pages = "278--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
keywords = "ALDY adaptive load distribution system; ALDY function
library; libraries; load distribution strategies;
parallel application programming; parallel
applications; parallel program processes; parallel
programming; PVM applications; resource allocation;
software",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Govindan:1996:OMP,
author = "V. Govindan and Y. Park and X. Li and S. Crear and O.
Johnson",
title = "An overview of a {MPI} profiling environment for the
{NEC Cenju-3}",
crossref = "IEEE:1996:PSM",
pages = "185--188",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6115 (Programming support); C6120 (File
organisation); C6150G (Diagnostic, testing, debugging
and evaluating systems); C6150J (Operating systems);
C6150N (Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "High Performance Comput. Center, Houston Univ., TX,
USA",
keywords = "application program interface; application program
interfaces; data visualisation; dynamic trace buffer
management; message passing; Message Passing Interface;
MPI applications; MPI profiling environment; MPP
research prototype; NEC Cenju-3; NSF Grand Challenge
Application Group; operating system; operating systems
(computers); parallel machines; program diagnostics;
software libraries; storage management; user-driven
visualization; virtual memory; virtual storage;
visualization tool",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@Article{Gropp:1996:HPM,
author = "W. Gropp and E. Lusk",
title = "A high-performance {MPI} implementation on a
shared-memory vector supercomputer",
journal = j-PARALLEL-COMPUTING,
volume = "22",
number = "11",
pages = "1513--??",
month = "????",
year = "1996",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Mar 18 12:33:29 MST 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gropp:1996:HPP,
author = "William Gropp and Ewing Lusk and Nathan Doss and
Anthony Skjellum",
title = "High-performance, portable implementation of the {MPI}
{Message Passing Interface Standard}",
journal = j-PARALLEL-COMPUTING,
volume = "22",
number = "6",
pages = "789--828",
day = "20",
month = sep,
year = "1996",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:15:01 MDT 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1996&volume=22&issue=6;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1996&volume=22&issue=6&aid=1075",
acknowledgement = ack-nhfb,
affiliation = "Argonne Natl Lab",
affiliationaddress = "Argonne, IL, USA",
classification = "722.2; 722.4; 723; 723.1; 723.2; 902.2; C6110B
(Software engineering techniques); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "applications; Computer programming; Computer software
portability; Data communication systems; design goal;
distribution; environments; free; future developments;
high-performance portable implementation; Interfaces
(computer); library writers; message passing; Message
passing interface; MPI message; MPI-2; MPICH; parallel
computer vendors; Parallel processing systems; parallel
programming; Parallel programming environment; passing
interface standard; portable parallel programming
environment; programming; project management; software
libraries; software performance evaluation; software
portability; software standards; software tools;
specialists; specification; standard library;
Standards",
treatment = "P Practical",
}
@InProceedings{Hachler:1996:IAC,
author = "G. Hachler and H. Burkhart",
title = "Implementing the {ALWAN} communication and data
distribution library using {PVM}",
crossref = "Bode:1996:PVM",
pages = "243--250",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6115 (Programming support);
C6140D (High level languages); C6150N (Distributed
systems software)",
corpsource = "Dept. of Inf., Basel Univ., Switzerland",
keywords = "ALWAN communication and data distribution; code
generation; CRAY T3D; environment; IBM SP2; INTEL
PARAGON; language programming; library; measurements;
message passing; mixed-; parallel application
programmability; parallel coordination language;
parallel languages; parallel programming; performance;
performance evaluation; performance portability;
programming environments; PVM; reusability; software;
software component reuse; software libraries; software
portability",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Haechler:1996:IAC,
author = "G. Haechler and H. Burkhart",
title = "Implementing the {ALWAN} Communication and Data
Distribution Library Using {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "243--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Heckathorn:1996:SSP,
author = "H. Heckathorn and B. Popp and W. Smith and D. Conklin
and D. A. Newman and F. Wieland",
title = "{SSGM}: from serial to parallel processing using
{PVM}",
journal = j-PROC-SPIE,
volume = "2741",
pages = "267--277",
month = "????",
year = "1996",
CODEN = "PSISDG",
ISSN = "0277-786X (print), 1996-756X (electronic)",
ISSN-L = "0277-786X",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C3240K (Image sensors)C6185 (Simulation techniques);
C3360L (Aerospace control); C3375 (Military control
systems); C4260 (Computational geometry); C5220P
(Parallel architecture); C5260B (Computer vision and
image processing techniques); C5440 (Multiprocessing
systems); C6130B (Graphics techniques); C6150N
(Distributed systems software); C6160S (Spatial and
pictorial databases); C7460 (Aerospace engineering
computing)",
conflocation = "Orlando, FL, USA; 9-11 April 1996",
conftitle = "Technologies for Synthetic Environments:
Hardware-in-the-Loop Testing",
corpsource = "Div. of Space Sci., Naval Res. Lab., Washington, DC,
USA",
fjournal = "Proceedings of the SPIE --- The International Society
for Optical Engineering",
keywords = "aerospace computing; aerospace simulation;
computational; computational speed requirements; data
visualisation; databases; geometry; guidance;
hardware-in-; heterogeneous computers; high-fidelity
real-time distributed simulation; high-fidelity scene
generation; image; infrared imaging; IR sensor testing;
latency; message; message passing system; military
computing; military systems; missile; missile defence
simulation; model; optical tracking; optimistic;
optimistic computing; parallel; parallel machines;
parallel processing; parallel virtual machine
programming environment; passing; physics-based
distributed simulation; physics-based phenomenology
models; problems; processing; programming environments;
protocols; radar; radar imaging; realistic images;
rendering (computer graphics); signatures;
surveillance; synchronization; synergistic; synthetic
scene generation; target RCS; target tracking;
technologies; the-loop simulation; tracking; virtual
machines; visual; visualisation",
sponsororg = "SPIE",
treatment = "P Practical",
}
@InProceedings{Hempel:1996:APT,
author = "R. Hempel and F. Zimmermann",
title = "On the automatic {PARMACS-to-MPI} transformation in
application programs",
crossref = "Liddell:1996:HPC",
pages = "1033--1034",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5610 (Computer interfaces); C6150E (General utility
programs); C6150N (Distributed systems software); C6155
(Computer communications software)",
conftitle = "High-Performance Computing and Networking.
International Conference and Exhibition HPCN Europe
1996",
corpsource = "German Nat. Res. Center for Inf. Technol., St.
Augustin, Germany",
keywords = "application program; application program interfaces;
computer interfaces; message passing; message passing
interface; PARMACS; translation tool",
treatment = "P Practical",
}
@InProceedings{Hempel:1996:SMM,
author = "R. Hempel",
title = "The status of the {MPI} message-passing standard and
its relation to {PVM}",
crossref = "Bode:1996:PVM",
pages = "14--21",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming); C6150N (Distributed systems
software)",
conftitle = "Parallel Virtual Machine --- EuroPVM '96. Third
European PVM Conference. Proceedings",
corpsource = "Computations and Commun. Res. Labs., NEC Europe Ltd.,
Sankt Augustin, Germany",
keywords = "application program interfaces; de-facto standard;
domain; HPFF; Interface Forum; message passing;
Message-Passing; Message-Passing Interface Forum; MPI
message-passing standard; MPI-1; MPI-2; parallel;
parallel computing; parallel programming; Parallel
Virtual Machine; PARMACS; portability interfaces;
programming; public; public domain; PVM; software
packages; software portability; software standards;
virtual machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Hong:1996:RDM,
author = "Chul-Eui Hong and Bum-Sik Lee and Gi-Won On and
Dong-Hae Chi",
title = "Replay for debugging {MPI} parallel programs",
crossref = "IEEE:1996:PSM",
pages = "156--160",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150N
(Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Comput. Div., Electron. and Telecommun. Res. Inst.,
Taejeon, South Korea",
keywords = "application program interfaces; bitonic-merge sort;
blocking message passing events; communication errors;
cyclic debugging; execution replay algorithm; hazards
and race conditions; lexical analyzer; logical time
stamping algorithm; merging; message passing; message
race conditions; MPI parallel program debugging; MPI
standard; nonblocking message passing events;
nondeterministic characteristics; parallel programming;
program debugging; reference execution; reproducible
behavior; software libraries; sorting",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@Article{Huckle:1996:PIS,
author = "T. Huckle",
title = "{PVM}-Implementation of Sparse Approximate Inverse
Preconditioners for Solving Large Sparse Linear
Equations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "166--173",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4130 (Interpolation and function approximation);
C4140 (Linear algebra); C6110P (Parallel programming);
C7310 (Mathematics computing)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
fjournal = "Lecture Notes in Computer Science",
keywords = "access; algorithms; black-box solver; compressed
sparse column format; computing; fast; Gram--Schmidt
process; householder matrices; iterative methods;
iterative solution; large sparse linear equations;
least squares approximations; least-; master-slave;
mathematics; matrix columns; matrix inversion; matrix
multiplication; model; nonsymmetric ill-conditioned
matrix; normal equations; parallel; preconditioned
conjugate gradient algorithm; preconditioners; PVM
implementation; QR-decomposition; sparse approximate
inverse; sparse matrices; squares problem; submatrices;
unstructured; virtual machines",
pubcountry = "Germany",
treatment = "P Practical; T Theoretical or Mathematical",
}
@MastersThesis{Jones:1996:LLM,
author = "Chris R. Jones",
title = "Low latency {MPI} for {Meiko CS/2} and {ATM}
clusters",
type = "Thesis (M.A.)",
school = "Department of Computer Science, University of
California, Santa Barbara",
address = "Santa Barbara, CA, USA",
year = "1996",
LCCN = "QA76.27.C2 S25",
bibdate = "Fri Feb 04 17:35:04 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Ju:1996:SPT,
author = "Jiubin Ju and Yong Wang",
title = "Scheduling {PVM} Tasks",
journal = j-OPER-SYS-REV,
volume = "30",
number = "3",
pages = "22--31",
month = jul,
year = "1996",
CODEN = "OSRED8",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "Dept. of Comput. Sci., Jilin Univ., Changchun, China",
fjournal = "Operating Systems Review",
keywords = "dynamically produced subtasks; environment; idle
workstations; job; parallel programming; pool tasks;
processor scheduling; PVM task scheduling; resource
utilization; response time; workstation cluster",
treatment = "P Practical",
}
@InProceedings{Juhasz:1996:PIP,
author = "Z. Juhasz and D. Crookes",
title = "A {PVM} implementation of a portable parallel image
processing library",
crossref = "Bode:1996:PVM",
pages = "188--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6140C (Optical information, image and video signal
processing); B6150C (Communication switching); B6210L
(Computer communications); C5260B (Computer vision and
image processing techniques); C5620L (Local area
networks); C6110B (Software engineering techniques);
C6110P (Parallel programming); C6115 (Programming
support); C6150N (Distributed systems software)",
corpsource = "Dept. of Inf. Syst., Veszprem Univ., Hungary",
keywords = "abstract communications layer; asynchronous transfer
mode; ATM network-based workstation clusters;
communication; Ethernet; extensibility; high-level
transparent; image processing; image processing
application development; layered; libraries; local
area; message passing; message-passing environment;
networks; parallel image processing library; parallel
programming; Parallel Virtual Machine; parallelism;
performance; portable; programming model; PVM
implementation; software; software model; software
portability; technologies; virtual machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Kafura:1996:CCC,
author = "D. Kafura and L. Huang",
title = "Collective communication and communicators in
{mpi++}",
crossref = "IEEE:1996:PSM",
pages = "79--86",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6120 (File organisation); C6140D (High level
languages); C6150E (General utility programs); C6150N
(Distributed systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Virginia Polytech. Inst. and
State Univ., Blacksburg, VA, USA",
keywords = "abstract data types; application program interfaces;
attribute caching; C language; C++ language binding;
cache storage; class hierarchy; collective
communication; collective communicators; collective
service; contexts; data structures; groups; Intel
Paragon; message passing; MPI; mpi++; mpi++ program;
object-oriented languages; parallel algorithm; Sun
Sparc workstation; utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Kale:1996:PMD,
author = "R. P. Kale and M. E. Fleharty and P. M. Alsing",
title = "Parallel molecular dynamics visualization using {MPI}
with {MPE} graphics",
crossref = "IEEE:1996:PSM",
pages = "104--110",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A6120J (Computer simulation of static and dynamic
liquid behaviour); C6110P (Parallel programming); C6115
(Programming support); C6130B (Graphics techniques);
C6150E (General utility programs); C7320 (Physics and
chemistry computing)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Chem. and Nucl. Eng., New Mexico Univ.,
Albuquerque, NM, USA",
keywords = "application program interfaces; atomic interactions;
boundary-value problems; data visualisation; digital
simulation; force decomposition; graphics rendering;
IBM SP1; IBM SP2; infinitely replicated confined
region; irregular geometries; load balancing; message
passing; Message Passing Interface; molecular dynamics
method; MPE graphics; MPI Extensions; OpenGL graphics
library; parallel molecular dynamics visualization;
parallel programming; periodic boundary conditions;
physics computing; portable algorithm; real- time 3D
object manipulation; real-time systems; rendering
(computer graphics); SGI Onyx high-end graphics
computer; sockets; software libraries; software
portability; workstation clusters; X-Windows calls",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Katkere:1996:VWI,
author = "A. Katkere and J. Schlenzig and R. Jain",
title = "{VRML-based WWW} interface to {MPI} Video",
crossref = "ACM:1996:SVR",
pages = "25--31, 137",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6130B (Graphics techniques); C6130M (Multimedia);
C6140D (High level languages); C7210 (Information
services and centres)",
conftitle = "Proceedings of 1995 VMRL Workshop",
corpsource = "Visual Comput. Lab., California Univ., San Diego, La
Jolla, CA, USA",
keywords = "hypermedia; hypermedia markups; interaction metaphor;
interactive television; interactive video; Internet;
motion information; MPI Video; multiple perspective
video streams; on-the-fly updating; page description
languages; query processing; simulation languages;
standard; three dimensional objects; three dimensional
scenes; video data; virtual reality; Virtual Reality
Modeling Language; VRML; VRML specification; World Wide
Web interface; WWW interface",
sponsororg = "San Diego Supercomput. Center; ACM",
treatment = "P Practical",
}
@InProceedings{Kermarrec:1996:PDS,
author = "Y. Kermarrec and L. Pautet",
title = "Programming Distributed Systems with Both {Ada} 95 and
{PVM}",
crossref = "Toussaint:1996:AES",
pages = "206--216",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6140D (High level languages); C7430
(Computer engineering)",
corpsource = "ENST de Bretagne, Brest, France",
keywords = "Ada; Ada 95; annex; communication architecture;
distributed; distributed system; distributed systems
programming; facilities; features; GNAT; low level;
parallel; parallel languages; parallel machines;
Parallel Virtual Machine; programming; PVM; virtual
machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Kohl:1996:PTF,
author = "J. A. Kohl and G. A. Geist",
title = "The {PVM} 3.4 Tracing Facility and {XPVM} 1.1",
crossref = "El-Rewini:1996:PTN",
volume = "1",
pages = "290--299",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6150G (Diagnostic, testing, debugging
and evaluating systems); C6150N (Distributed systems
software)",
corpsource = "Div. of Comput. Sci. and Math., Oak Ridge Nat. Lab.,
TN, USA",
keywords = "buffering; diagnostics; evaluation; event mask;
format; graphical user interfaces; heterogeneous
environment; library; mechanism; message passing;
on-the-fly adjustment; parallel; parallel programming;
Parallel Virtual Machine; performance tuning; program;
program compilers; program debugging; program execution
histories; program monitoring; programming; PVM 3.4;
PVM library; run-time; self-defining data;
shared-memory multiprocessors; software libraries;
software performance; trace; trace event definition;
trace events; tracing facility; tracing tool;
user-defined custom; virtual machines; workstation
clusters; XPVM 1.1",
sponsororg = "Univ. Hawaii; Univ. Hawaii College of Bus. Adm",
treatment = "P Practical",
}
@InProceedings{Kormicki:1996:PLS,
author = "M. Kormicki and A. Mahmood and B. S. Carlson",
title = "Parallel logic simulation on a network of workstations
using {PVM}",
crossref = "IEEE:1996:EIS",
pages = "2--9",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B1130B (Computer-aided circuit analysis and design);
B1265B (Logic circuits); C5210B (Computer-aided logic
design); C5440 (Multiprocessing systems); C7410D
(Electronic engineering computing)",
corpsource = "Washington State Univ., Richland, WA, USA",
keywords = "activity level; ATM; balance; CAD; circuit analysis
computing; combinational circuits; driven logic
simulation algorithm; Ethernet; gate evaluations; high
performance; ISCAS; ISCAS combinational benchmark
circuits; load; logic; logic testing; network of
workstations; output event-; parallel logic simulation;
parallel machines; parallel virtual machine;
performance; PVM; random partitioning; semi-optimistic
scheme; sequential benchmark circuits; sequential
circuits; switched; virtual machines",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Comput.
Architecture; IEEE Comput. Soc. Tech Committee on
Distributed Process.; IEEE Comput. Soc. Dallas
Chapter",
treatment = "A Application; P Practical",
}
@InProceedings{Kotsis:1996:EEP,
author = "G. Kotsis and F. Sukup",
title = "Efficiency Evaluation of {PVM 2.X}, {PVM 3.X}, {P4},
{EXPRESS} and {LINDA} on a Workstation Cluster Using
the {NAS} Parallel Benchmarks",
crossref = "Zaky:1996:PDT",
pages = "149--171",
year = "1996",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Krantz:1996:RFP,
author = "A. T. Krantz and A. Zadroga and S. E. Chodrow and V.
S. Sunderam",
title = "An {RPC} facility for {PVM}",
crossref = "Liddell:1996:HPC",
pages = "798--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150N (Distributed systems software)",
corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
keywords = "adaptive parallelism; client-server; client-server
systems; computing; concurrent computing; distributed
applications; failure resilience; heterogeneous
environments; message passing; message-;
message-passing systems; parallel processing; parallel
virtual machine; passing paradigm; processor
scheduling; remote procedure call; remote procedure
calls; user-transparent load balancing",
pubcountry = "Germany",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Krone:1996:ICF,
author = "O. Krone and M. Aguilar and B. Hirsbrunner and V.
Sunderam",
title = "Integrating Coordination Features in {PVM}",
crossref = "Ciancarini:1996:CLM",
pages = "432--435",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "Inst. d'Inf., Fribourg Univ., Switzerland",
keywords = "client/server; coordination; extended coordination;
features; generative communication; message passing;
parallel programming; parallel systems; programming;
PVM",
pubcountry = "Germany",
treatment = "T Theoretical or Mathematical",
}
@Article{Lawton:1996:BHP,
author = "J. V. Lawton and J. J. Brosnan and M. P. Doyle and S.
D. O. Riordain and T. G. Reddin",
title = "Building a high-performance message-passing system for
{MEMORY CHANNEL} clusters",
journal = j-DEC-TECH-J,
volume = "8",
number = "2",
pages = "96--116",
month = oct,
year = "1996",
CODEN = "DTJOEL",
ISSN = "0898-901X",
bibdate = "Thu Mar 20 18:15:43 MST 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.digital.com:80/DTJM08/DTJM08P8.PS",
abstract = "The new MEMORY CHANNEL for PCI cluster interconnect
technology developed by Digital (based on technology
from Encore Computer Corporation) dramatically reduces
the overhead involved in intermachine communication.
Digital has designed a software system, the TruCluster
MEMORY CHANNEL Software version 1.4 product, that
provides fast user-level access to the MEMORY CHANNEL
network and can be used to implement a form of
distributed shared memory. Using this product, Digital
has built a low-level message- passing system that
reduces the communications latency in a MEMORY CHANNEL
cluster to less than 10 microseconds. This system can,
in turn, be used to easily build the communications
libraries that programmers use to parallelize
scientific codes. Digital has demonstrated the
successful use of this message-passing system by
developing implementations of two of the most popular
of these libraries, Parallel Virtual Machine (PVM) and
Message Passing Interface (MPI).",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C6120 (File
organisation); C6150N (Distributed systems software)",
fjournal = "Digital Technical Journal",
keywords = "access; clusters; communications latency;
communications libraries; Computer Corporation;
distributed shared memory; Encore; high-performance
message-passing system; intermachine communication;
Machine; MEMORY CHANNEL; message passing; Message
Passing Interface; Parallel Virtual; PCI cluster
interconnect technology; scientific codes; software;
storage management; system; TruCluster MEMORY CHANNEL
Software; user-level",
treatment = "P Practical",
}
@Article{Lee:1996:TSF,
author = "Bu-Sung Lee and A. Heng and W. Cai and Tai-Ann Tan",
title = "Task scheduling facility for {PVM}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "6",
number = "4",
pages = "563--574",
month = dec,
year = "1996",
CODEN = "PPLTEE",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Tue Oct 21 18:27:39 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110B (Software
engineering techniques); C6150N (Distributed systems
software)",
corpsource = "Sch. of Appl. Sci., Nanyang Technol. Univ.,
Singapore",
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
keywords = "centralized task scheduler; client server system;
client-server systems; design issue; heterogeneous
computer systems; library routines; load balancing;
parallel machines; Parallel Virtual Machine; PVM;
resource allocation; round-robin task allocation
scheme; scheduling; software libraries; software
portability; task scheduling; virtual machines; virtual
metacomputer; workstations",
pubcountry = "Singapore",
treatment = "P Practical",
}
@InProceedings{Liang:1996:AEO,
author = "Wen-Yew Liang and Chun-Ta King and Feipei Lai",
title = "{Adsmith}: an efficient object-based distributed
shared memory system on {PVM}",
crossref = "Li:1996:SIS",
pages = "",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing); C6110J (Object-oriented
programming)",
corpsource = "Dept. of Comput. Sci. and Inf. Eng., Nat. Taiwan
Univ., Taipei, Taiwan",
keywords = "accesses; Adsmith; atomic operations; communication
subsystem; consistency; distributed memory systems;
distributed shared memory system; load/store-like
memory accesses; memory; memory systems; nonblocking;
object-oriented programming; parallel architectures;
performance; performance evaluation; PVM; release
memory; shared; shared objects",
sponsororg = "Chinese Nat. Res. Center for Intelligent Comput.
Syst.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech.
Committee on Parallel Process.; Steering Committee of
the Chinese Nat. Hi-Tech Programme; Inf. Process. Soc.
Japan; Chinese Comput. Federation; IEICE Inf. and Syst.
Soc",
treatment = "P Practical",
}
@InProceedings{Liu:1996:BMP,
author = "L. T. Liu and D. E. Culler and C. Yoshikawa",
title = "Benchmarking message passing performance using {MPI}",
crossref = "Reeves:1996:PIC",
volume = "1",
pages = "101--110",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing); C6150N (Distributed systems
software)",
conftitle = "Proceedings of 25th International Conference on
Parallel Processing",
corpsource = "Comput. Sci. Div., Berkeley Univ., CA, USA",
keywords = "benchmarks; IBM SP2; Intel Paragon; message passing;
message passing performance; microbenchmarks; MPI;
parallel machines; performance evaluation; SGI Power
Challenge",
sponsororg = "Int. Assoc. Comput. and Commun.; Pennsylvania State
Univ",
treatment = "P Practical",
}
@InProceedings{Loos:1996:MPS,
author = "T. Loos and R. Bramley",
title = "{MPI} performance on the {SGI Power Challenge}",
crossref = "IEEE:1996:PSM",
pages = "203--206",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6150E (General
utility programs); C6150N (Distributed systems
software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
keywords = "application program interfaces; collective
inter-processor communication; communications
efficiency; communications overhead; communications
tests; cost function; double precision arrays; graph
partitioning algorithm; memory copying; memory
performance curves; memory tests; message passing; MPI
performance; MPI performance curves; MPI standard;
parallel algorithms; parallel computers; performance
evaluation; point-to-point inter-processor
communication; primitives; second level cache; SGI
Power Challenge; shared memory multiprocessor; shared
memory systems; software performance evaluation;
synchronisation; synchronization; total message sizes;
utility programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Lu:1996:PIF,
author = "E. J.-L. Lu and D. I. Okunbor",
title = "Parallel implementation of {3D FMA} using {MPI}",
crossref = "IEEE:1996:PSM",
pages = "119--124",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A0270 (Computational techniques); A0320 (Classical
mechanics of discrete systems: general mathematical
aspects); A0545 (Theory and models of chaotic systems);
A9510C (Celestial mechanics); A9575P (Mathematical and
computer techniques in astronomy); C4240C
(Computational complexity); C4240P (Parallel
programming and algorithm theory); C6110P (Parallel
programming); C6150E (General utility programs)C7330
(Biology and medical computing); C6150N (Distributed
systems software); C7320 (Physics and chemistry
computing); C7350 (Astronomy and astrophysics
computing)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Missouri Univ., Rolla, MO,
USA",
keywords = "3D fast multipole algorithm; application program
interfaces; astronomy computing; astrophysics;
biochemistry; biology computing; biomolecular dynamics;
biophysics; chaos; chaotic characteristics; chemistry
computing; communication back-end; communication
overhead; computational complexity; digital simulation;
galactic system; load balancing; long-range force
calculation; message passing; Message Passing
Interface; MPI; N-body problems; N-body systems
simulation; parallel algorithms; parallel
implementation; partitioning technique; physics
computing; portable scalable parallel library; resource
allocation; time complexity",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Manis:1996:EPT,
author = "G. Manis and C. Voliotis and P. Tsanakas and G.
Papakonstantinou",
title = "Enhancing {PVM} with threads in distributed
programming",
crossref = "Liddell:1996:HPC",
pages = "1013--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software)",
corpsource = "Athens Nat. Tech. Univ., Greece",
keywords = "distributed programming; environment; Orchid; parallel
programming; platform; portable features; PVM;
software; software portability; thread-oriented PVM;
threads",
pubcountry = "Germany",
treatment = "G General Review; P Practical",
}
@InProceedings{Markus:1996:PEM,
author = "S. Markus and S. B. Kim and K. Pantazopoulos and A. L.
Ocken and E. N. Houstis and P. Wu and S. Weerawarana
and D. Maharry",
title = "Performance evaluation of {MPI} implementations and
{MPI} based {Parallel ELLPACK} solvers",
crossref = "IEEE:1996:PSM",
pages = "162--169",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4140 (Linear algebra); C4170 (Differential
equations); C4185 (Finite element analysis); C6150N
(Distributed systems software); C7310 (Mathematics
computing)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Purdue Univ., West Lafayette,
IN, USA",
keywords = "application program interfaces; distributed memory
architectures; domain decomposition; elliptic boundary
value problems; elliptic equations; finite difference
methods; finite element mesh generation; iterative
solvers; ITPACK library; mathematics computing; mesh
generation; mesh partitioning; message passing; message
passing communication libraries; MIMD; MPI; Parallel
ELLPACK; parallel mesh generator; partial differential
equations; problem solving environment; PVM; second
order elliptic partial differential equations; software
libraries; software performance evaluation; sparse
algebraic equations; sparse matrices; workstation
clusters",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Martin:1996:WTW,
author = "D. E. Martin and T. J. McBrayer and P. A. Wilsey",
editor = "H. El-Rewini and B. D. Shriver",
booktitle = "{Proceedings of the Twenty-Ninth Hawaii International
Conference on System Sciences}",
title = "{WARPED}: a time warp simulation kernel for analysis
and application development",
volume = "1",
publisher = "????",
address = "????",
pages = "5--??",
year = "1996",
ISBN = "0-8186-7324-9",
ISBN-13 = "978-0-8186-7324-5",
LCCN = "????",
bibdate = "Sun Apr 13 12:29:32 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "WARPED is a publicly-available time warp simulation
kernel for experimentation and application development.
The kernel defines a standard interface to the
application developer and is designed to provide a
highly configurable environment for the integration of
time warp optimizations. It is written in C++, uses the
MPI (Message Passing Interface) standard and shared
memory for communication, and executes on a variety of
platforms including a network of SUN workstations, a
SUN SMP workstation, the IBM SP1/SP2 multiprocessors,
the Intel Paragon and IBM-compatible PCs running Linux.
WARPED is distributed with several applications and
includes a sequential kernel implementation for
comparative analysis. The kernel supports LP (logical
process) clustering, various time warp algorithms and
several optimizations that dynamically adjust
simulation parameters.",
acknowledgement = ack-nhfb,
classcodes = "C6185 (Simulation techniques); C6115 (Programming
support); C6150N (Distributed systems software); C6110P
(Parallel programming)",
conflocation = "Wailea, HI, USA; 3-6 Jan. 1996",
conftitle = "Proceedings of HICSS-29: 29th Hawaii International
Conference on System Sciences",
corpsource = "Dept. of ECECS, Cincinnati, OH, USA",
keywords = "analysis; application; application program interfaces;
applications; C++; comparative; configurable
environment; development; development systems; discrete
event simulation; dynamic simulation; IBM compatible;
IBM SP1/SP2 multiprocessors; Intel Paragon; Linux;
logical process clustering; memory systems; message
passing; Message Passing Interface; microcomputer; MPI
standard; optimisation; optimizations; parallel
algorithms; parameter adjustment; PCs; sequential
kernel implementation; shared; shared memory; SUN SMP
workstation; SUN workstation network; synchronisation;
time warp; time warp simulation; time warp simulation
kernel; WARPED",
sponsororg = "Univ. Hawaii; Univ. Hawaii College of Bus. Adm",
treatment = "P Practical",
}
@InProceedings{McCandless:1996:OOM,
author = "B. C. McCandless and J. M. Squyres and A. Lumsdaine",
title = "Object Oriented {MPI} ({OOMPI}): a class library for
the {Message Passing Interface}",
crossref = "IEEE:1996:PSM",
pages = "87--94",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110F (Formal methods)C6140D (High level
languages); C6110J (Object-oriented programming);
C6110P (Parallel programming); C6115 (Programming
support); C6150E (General utility programs)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci. and Eng., Notre Dame Univ., IN,
USA",
keywords = "application program interfaces; C language; C++
bindings; C++ class library; formal specification;
generic specification; message passing; Message Passing
Interface; object-oriented class library;
object-oriented languages; Object-Oriented MPI;
object-oriented programming; OOMPI; parallel
programming; program description language; software
libraries",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{McDonald:1996:NNP,
author = "K. McDonald",
title = "The {NAG Numerical PVM Library}",
crossref = "Dongarra:1996:APC",
pages = "419--428",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4100 (Numerical analysis); C4240P (Parallel
programming and algorithm theory); C5220P (Parallel
architecture); C6110P (Parallel programming); C6115
(Programming support); C6150N (Distributed systems
software)",
corpsource = "Numerical Algorithms Group Ltd., Oxford, UK",
keywords = "analysis; distributed memory systems;
distributed-memory; efficient software; general-purpose
numerical library; machines; message passing; NAG
Fortran 77 Library; NAG Numerical PVM Library;
numerical; Numerical Algorithms Group; parallel
algorithms; parallel programming; parallel software;
portable; public-domain message-passing; ScaLAPACK
project; scalar computers; shared-memory computers;
software; software libraries; software portability;
state-of-the-art; systems; vector computers",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{McMahon:1996:EEE,
author = "T. P. McMahon and A. Skjellum",
title = "{eMPI\slash eMPICH}: embedding {MPI}",
crossref = "IEEE:1996:PSM",
pages = "180--184",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6115
(Programming support); C6150N (Distributed systems
software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
keywords = "application program interface; application program
interfaces; bottom-up design; design paradigms;
embeddable libraries; embeddable MPI versions; eMPI;
eMPICH; memory-constrained systems; message passing;
real-time systems; software libraries; systems
analysis; top-down design",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Menden:1996:PPP,
author = "J. Menden and G. Stellner",
title = "Proving properties of {PVM} applications --- a case
study with {CoCheck}",
crossref = "Bode:1996:PVM",
pages = "134--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6110F (Formal methods); C6110P (Parallel programming);
C6150G (Diagnostic, testing, debugging and evaluating
systems); C6150N (Distributed systems software); C7430
(Computer engineering)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
keywords = "case study; checkpoint; CoCheck; creation; distributed
algorithms; formal method; machines; parallel
applications; parallel programming; Parallel Virtual
Machine; program; programming theory; properties;
proving; PVM applications; systems software;
verification; virtual; workstation clusters",
pubcountry = "Germany",
treatment = "P Practical; T Theoretical or Mathematical",
}
@Article{Miei:1996:IER,
author = "T. Miei and N. Takahashi",
title = "Implementation and evaluation of a replay-based
debugger for {PVM} programs",
journal = j-TRANS-INFO-PROCESSING-SOC-JAPAN,
volume = "37",
number = "7",
pages = "1308--1319",
month = jul,
year = "1996",
CODEN = "JSGRD5",
ISSN = "0387-5806",
ISSN-L = "0387-5806",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6150G (Diagnostic, testing, debugging and
evaluating systems); C6150N (Distributed systems
software)",
fjournal = "Transactions of the Information Processing Society of
Japan",
keywords = "code; dbxR; demand-driven replay method; dynamic
execution sequences; message passing; message passing
communications; message passing library;
nondeterministic execution behavior; parallel
programming; parallel programs; program debugging;
program debugging evaluation; PVM programs;
replay-based debugger; shared memory systems;
shared-memory parallel programs; software performance
evaluation; static source",
language = "Japanese",
pubcountry = "Japan",
treatment = "P Practical",
}
@Article{Miguel:1996:APN,
author = "Jose Miguel and Agustin Arruabarrena and Ramon Beivide
and Jose Angel Gregorio",
title = "Assessing the performance of the new {IBM SP2}
communication subsystem",
journal = j-IEEE-PAR-DIST-TECH,
volume = "4",
number = "4",
pages = "12--22",
month = "Winter",
year = "1996",
CODEN = "IPDTEX",
DOI = "https://doi.org/10.1109/88.544433",
ISSN = "1063-6552 (print), 1558-1861 (electronic)",
ISSN-L = "1063-6552",
bibdate = "Fri Apr 11 07:24:28 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Universidad del Pais Vasco",
affiliationaddress = "Spain",
classification = "716.1; 721.1; 722.2; 722.4; 723; 912.3; C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing); C6150N (Distributed systems
software)",
corpsource = "Dept. of Comput. Archit. and Technol., Univ. del Pais
Vasco, San Sebastian, Spain",
fjournal = "IEEE parallel and distributed technology: systems and
applications",
journalabr = "IEEE Parallel Distrib Technol",
keywords = "Bandwidth; basic; collective communication;
Communication channels (information theory);
communication tests; Computer software; Computer
testing; computers; distributed memory systems;
evaluation; execution; execution time; Execution times;
fault tolerant computing; high performance switch; IBM;
IBM SP2 communication subsystem; Interconnection
networks; interface adapters; Interfaces (computer);
latency; Memory latency; message; message passing;
Message passing interface (MPI); Microprocessor chips;
MPI message passing library; parallel applications;
parallel computer; parallel machines; Parallel
processing systems; Parallel virtual machine (pvm);
performance; performance assessment; performance
indicators; real applications; reliability; SP2;
Switching; Synchronization; Systems analysis; tests;
throughput; times",
treatment = "P Practical",
}
@InProceedings{Mo:1996:IOP,
author = "J. Mo and F. Romelfanger and R. J. Hanisch and D.
Redding and S. Sirlin and A. Boden",
title = "Implementation of an optical prescription retrieval
code using {PVM} (parallel virtual machine) in a mixed
architecture network",
crossref = "Jacoby:1996:ADA",
pages = "100--103",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A9575P (Mathematical and computer techniques in
astronomy); C5440 (Multiprocessing systems); C6150N
(Distributed systems software); C7350 (Astronomy and
astrophysics computing); C7430 (Computer engineering)",
corpsource = "Space Telescope Sci. Inst., Baltimore, MD, USA",
keywords = "astronomy computing; machine; mixed architecture
network; optical prescription retrieval code; parallel;
parallel computing application; parallel machines;
parallel virtual; performance comparisons; programming;
PVM software system; virtual machines",
treatment = "X Experimental",
}
@InProceedings{Muller:1996:CDI,
author = "A. Muller and R. Ruhl",
title = "Communication-buffers for data-parallel, irregular
computations",
crossref = "Szymanski:1996:LCR",
pages = "295--298",
year = "1996",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Centro Svizzero di Calcolo Sci., Eidgenossische Tech.
Hochschule, Manno, Switzerland",
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6120 (File organisation); C6140D (High
level languages); C6150C (Compilers, interpreters and
other processors)",
keywords = "Buffer organizations; Common user interface;
Communication buffers; Compiler generated parallel
programs; Critical code segments; Data parallel
irregular computations; Data-parallel irregular
computations; Distributed data; Distributed memory
parallel processors; Distributed programs; High
Performance Fortran compiler; HPF extensions; Low level
machine interface; Message Passing Interface;
Parallelization Support Tool; Performance monitor;
Portable integrated tool environment Annai; PST HPF
extensions; Run time preprocessing; Source level
debugger; Unstructured computations",
thesaurus = "Buffer storage; Distributed memory systems; FORTRAN;
Parallel languages; Parallel programming; Parallelising
compilers",
}
@Article{Nagel:1996:VVA,
author = "W. E. Nagel and A. Arnold and M. Weber and H. C. Hoppe
and K. Solchenbach",
title = "{VAMPIR}: Visualization and Analysis of {MPI}
Resources",
journal = j-SUPERCOMPUTER,
volume = "12",
number = "1",
pages = "69--80",
month = jan,
year = "1996",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Central Inst. for Appl. Math., Res. Centre Julich,
Germany",
classification = "C6110P (Parallel programming); C6110S (Software
metrics); C6150N (Distributed systems software); C6155
(Computer communications software)",
corpsource = "Central Inst. for Appl. Math., Res. Centre Julich,
Germany",
fjournal = "Supercomputer",
keywords = "activity chart; Activity chart; analysis; Analysis;
animation mode; Animation mode; computer interfaces;
flexible filter operation; Flexible filter operation;
information display reduction; Information display
reduction; message passing; message passing interface;
Message passing interface; message passing standard;
Message passing standard; MPI; MPI resource; parallel
programming; Parallel programming; PARvis; performance
bottleneck location; Performance bottleneck location;
software performance analysis; Software performance
analysis; software performance evaluation; state
diagram; State diagram; statistics; Statistics;
time-line displays,; Time-line displays,; tracing;
Tracing; VAMPIR; VAMPIR:; visualization; Visualization;
zooming; Zooming",
pubcountry = "Netherlands",
thesaurus = "Computer interfaces; Message passing; Parallel
programming; Software performance evaluation",
treatment = "P Practical",
}
@InProceedings{NicCanna:1996:LGS,
author = "C. {Nic Canna} and C. J. Bean",
title = "Larger grids and shorter wall-clock times on a
parallel virtual machine ({PVM}) --- an example using a
finite difference wave simulation algorithm",
crossref = "Abrahart:1996:GIC",
volume = "2",
pages = "2--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4170 (Differential equations); C6150N (Distributed
systems software); C6185 (Simulation techniques); C7340
(Geophysics computing); C7430 (Computer engineering)",
corpsource = "Dept. of Geol., Univ. Coll. Dublin, Ireland",
keywords = "acoustic wave equation; acoustic waves; algorithm;
array sizes; computer modelling; digital simulation;
Earth; finite difference; finite difference solution;
finite difference wave simulation; geologically
realistic; geophysics computing; machines; message
passing; methods; parallel; parallel virtual machine;
PVM message passing library; sciences; seismic wave;
seismology; simulation method; virtual machines;
virtually parallel machine; wall clock times; wave
equations",
pubcountry = "UK",
treatment = "P Practical",
}
@InProceedings{Oberhuber:1996:MNP,
author = "M. Oberhuber",
title = "Managing nondeterminism in {PVM} programs",
crossref = "Bode:1996:PVM",
pages = "347--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6110P (Parallel programming); C6115 (Programming
support); C6150G (Diagnostic, testing, debugging and
evaluating systems)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
keywords = "interprocess communication; nondeterminism; parallel
machines; parallel programming; parallel programs;
program debugging; program testing; PVM programs;
TOOLSET environment; virtual machines",
pubcountry = "Germany",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Ogawa:1996:OOM,
author = "Hirotaka Ogawa and Satoshi Matsuoka",
title = "{OMPI}: Optimizing {MPI} Programs Using Partial
Evaluation",
crossref = "ACM:1996:SCP",
pages = "??--??",
year = "1996",
bibdate = "Mon Mar 23 12:31:18 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.supercomp.org/sc96/proceedings/SC96PROC/OGAWA/INDEX.HTM",
acknowledgement = ack-nhfb,
}
@InProceedings{Papakostas:1996:PPP,
author = "N. Papakostas and G. Papakonstantinou and P.
Tsanakas",
title = "{PPARDB\slash PVM}: a portable {PVM} based parallel
database management system",
crossref = "Boszormenyi:1996:PCT",
pages = "",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6160B
(Distributed databases)C6110P (Parallel programming);
C6160D (Relational databases); C7430 (Computer
engineering)",
corpsource = "Dept. of Electr. and Comput. Eng., Nat. Tech. Univ. of
Athens, Greece",
keywords = "architecture; computational model; crowd; database
processing elements; databases; distributed databases;
heterogeneous workstation; horizontal; layered;
multicasting; network; one master/multiple slaves;
operating system dependencies; operator parallelism;
parallel database management system; parallel
programming; partitioning; portable; portable PVM based
parallel database management; PPARDB/PVM; process
synchronisation; relation tuples; relational;
relational parallel database management system;
relationship; scientific programming; shared nothing;
software portability; system; tasks; transputer
network; virtual machines; virtual parallel computer;
workstations",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Papakostas:1996:PSP,
author = "N. Papakostas and G. Papakonstantinou and P.
Tsanakas",
title = "{PPARDB} \slash{ PVM}: a Portable {PVM} Based Parallel
Database Management System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1127",
pages = "219--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Papakostas:1996:UPI,
author = "N. Papakostas and G. Papakonstantinou and P.
Tsanakas",
title = "Using {PVM} to implement {PPARDB\slash PVM}, a
portable parallel database management system",
crossref = "Bode:1996:PVM",
pages = "108--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110P
(Parallel programming)C7430 (Computer engineering);
C6160B (Distributed databases)",
corpsource = "Dept. of Electr. Eng., Nat. Tech. Univ. of Athens,
Greece",
keywords = "distributed databases; heterogeneous workstation
network; operator parallelism; parallel programming;
parallel systems; portability; portable communication
primitives; portable parallel database management;
PPARDB/PVM; PVM; separate database node; shared nothing
architecture; software; system; virtual machines;
workstations",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Pernice:1996:RPP,
author = "Michael Pernice",
title = "Review of ``{PVM: Parallel Virtual Machine. A User's
Guide and Tutorial for Networked Parallel
Computing}''",
journal = j-IEEE-PAR-DIST-TECH,
volume = "4",
number = "1",
pages = "84--84",
month = "Spring",
year = "1996",
CODEN = "IPDTEX",
DOI = "https://doi.org/10.1109/M-PDT.1996.481711",
ISSN = "1063-6552 (print), 1558-1861 (electronic)",
ISSN-L = "1063-6552",
bibdate = "Tue Jan 23 16:38:43 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/pd/books/pd1996/pdf/p1084.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE parallel and distributed technology: systems and
applications",
}
@Article{Pokorny:1996:CMP,
author = "S. Pokorny",
title = "A Comparison of Message-Passing Parallelization to
Shared-Memory Parallelization",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "22--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Pruyne:1996:ICP,
author = "Jim Pruyne and Miron Livny",
title = "Interfacing {Condor} and {PVM} to harness the cycles
of workstation clusters",
journal = j-FUT-GEN-COMP-SYS,
volume = "12",
number = "1",
pages = "67--85",
month = may,
year = "1996",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jul 15 09:06:07 MDT 2005",
bibsource = "ftp://ftp.ira.uka.de/bibliography/Parallel/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
classification = "C5620 (Computer networks and techniques); C6110P
(Parallel programming); C6150J (Operating systems);
C6150N (Distributed systems software)",
corpsource = "Dept. of Comput. Sci., Wisconsin Univ., Madison, WI,
USA",
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
keywords = "allocation; Condor; network operating systems;
parallel; parallel programming; processing; PVM;
resource; resource management; resource management
system; workstation clusters",
pubcountry = "Netherlands",
remark = "Resource Management in Distributed Systems",
treatment = "P Practical",
}
@Article{Qaddouri:1996:CPC,
author = "A. Qaddouri and R. Roy and M. Mayrand and B. Goulard",
title = "Collision Probability Calculation and Multigroup Flux
Solvers Using {PVM}",
journal = j-NUCL-SCI-ENG,
volume = "123",
number = "3",
pages = "392--402",
month = jul,
year = "1996",
CODEN = "NSENAO",
ISSN = "0029-5639",
ISSN-L = "0029-5639",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A0260 (Numerical approximation and analysis); A2820H
(Neutron diffusion); A2841C (Computer codes for fission
reactor theory and design)",
conflocation = "Portland, OR, USA; 30 April-4 May 1995",
conftitle = "International Conference on Mathematics and
Computations, Reactor Physics, and Environmental
Analyses",
corpsource = "Inst. de Genie Nucl., Ecole Polytech. de Montreal,
Que., Canada",
fjournal = "Nuclear Science and Engineering",
keywords = "collision probability; cyclic; IBM SP2; iterative
methods; iterative process; linearized; multigroup flux
solvers; multigroup transport equation; neutron flux;
neutron transport theory; nuclear engineering
computing; PVM library; run times; SPARC 1000;
time-independent transport equation; tracking; two-step
energy/space",
sponsororg = "ANS; Eur. Nucl. Soc.; Atomic Energy Soc. Japan",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Ragg:1996:PEN,
author = "T. Ragg",
title = "Parallelization of an evolutionary neural network
optimizer based on {PVM}",
crossref = "Bode:1996:PVM",
pages = "351--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C1180 (Optimisation techniques); C4240P (Parallel
programming and algorithm theory); C5290 (Neural
computing techniques); C6150J (Operating systems);
C6150N (Distributed systems software)",
corpsource = "Institut fur Logik, Karlsruhe Univ., Germany",
keywords = "allocation; batch processing (computers); batch
program; dynamic load balancing; ENZO; evolutionary
neural network optimizer; genetic algorithms; machine
load; nets; neural; parallel algorithms;
parallelization; pattern recognition; PVM; resource;
workstation-cluster",
pubcountry = "Germany",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Reimann:1996:CBT,
author = "D. A. Reimann and V. Chaudhary and M. J. Flynn and I.
K. Sethi",
title = "Cone beam tomography using {MPI} on heterogeneous
workstation clusters",
crossref = "IEEE:1996:PSM",
pages = "142--148",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5260B (Computer vision and image processing
techniques); C5440 (Multiprocessing systems); C5620L
(Local area networks); C6150N (Distributed systems
software); C7410H (Computerised instrumentation)",
conftitle = "Proceedings. Second MPI Developer's Conference",
keywords = "application program interfaces; asynchronous
communication; asynchronous MPI; backprojection;
computerised tomography; cone beam tomography;
heterogeneous workstation clusters; image
reconstruction; load balancing; local area networks;
memory requirements; message passing; Message Passing
Interface; MPI; parallel methods; processing time;
processor utilization; projection views; resource
allocation; software libraries",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical; T Theoretical or Mathematical",
}
@InProceedings{Robinson:1996:TMI,
author = "J. Robinson and S. H. Russ and B. Flachs and B.
Heckel",
title = "A task migration implementation of the
{Message-Passing Interface}",
crossref = "IEEE:1996:PFI",
pages = "61--68",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150E (General
utility programs); C6150N (Distributed systems
software)",
conftitle = "Proceedings of 5th IEEE International Symposium on
High Performance Distributed Computing",
corpsource = "NSF Eng. Res. Center for Comput. Field Simulation,
Mississippi State Univ., MS, USA",
keywords = "application program interfaces; Hector; heterogeneous
computing task allocator; heterogeneous platforms;
message passing; Message-Passing Interface; MPI
specification; parallel processing applications;
parallel program performance improvement; parallel
programming; software performance evaluation; task
migration implementation; workstation networks",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process.; Northeast Parallel Architectures Center; New
York State Center for Adv. Technol. Comput.
Applications and Software Eng. (CASE Center) at
Syracuse Univ.; Rome Lab",
treatment = "P Practical",
}
@InProceedings{Roda:1996:PEI,
author = "J. Roda and J. Herrera and J. Gonzalez and C.
Rodriguez and F. Almeida and D. Gonzalez",
title = "Practical experiments to improve {PVM} algorithms",
crossref = "Bode:1996:PVM",
pages = "30--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150N (Distributed systems software); C6155
(Computer communications software); C7430 (Computer
engineering)",
corpsource = "Univ. de La Laguna, Spain",
keywords = "broadcasting strategies; computer communications
software; intensive communication experiments; LAN;
local area networks; measurement; network parameter;
parallel processing; parallel virtual machine; PVM
algorithms; pvm.mcast; pvm.send; virtual machines",
pubcountry = "Germany",
treatment = "X Experimental",
}
@InProceedings{Russ:1996:HAT,
author = "S. H. Russ and B. Flachs and J. Robinson and B.
Heckel",
title = "Hector: automated task allocation for {MPI}",
crossref = "IEEE:1996:PII",
pages = "344--348",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5620L (Local area
networks); C6110P (Parallel programming); C6150N
(Distributed systems software)",
conftitle = "Proceedings of International Conference on Parallel
Processing",
corpsource = "Mississippi State Univ., MS, USA",
keywords = "automated task allocation; automatic run-time
performance optimization; C; computational resources;
FORTRAN; Hector; local area networks; message passing;
message- passing; MPI; multiprocessing systems;
parallel processing; parallel programming; resource
allocation; software performance evaluation; task
migration; workstation networks",
sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM
SIGARCH",
treatment = "P Practical",
}
@InProceedings{Santana:1996:PVM,
author = "M. S. Santana and P. S. Souza and R. C. Santana and S.
S. Souzza",
title = "{Parallel Virtual Machine} for {Windows95}",
crossref = "Bode:1996:PVM",
pages = "288--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6150J (Operating systems); C6150N
(Distributed systems software)",
corpsource = "Inst. de Ciencias Math., Sao Carlos, Brazil",
keywords = "(computers); communication; local area networks;
message; message passing; network; operating systems;
parallel applications; parallel programming; Parallel
Virtual Machine for Windows 95; passing environment;
personal computers; PVM-W95; speedups; Windows 95
operating system; workstations",
pubcountry = "Germany",
treatment = "P Practical",
xxauthor = "M. S. Santana and R. C. Santana and P. S. Souza and S.
S. Souza",
}
@Article{Schuele:1996:PLA,
author = "J. Schuele",
title = "Parallel {Lanczos} Algorithm on a {CRAY-T3D} Combining
{PVM} and {SHMEM} Routines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "158--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Schule:1996:PLA,
author = "J. Schule",
title = "Parallel {Lanczos} algorithm on a {CRAY-T3D} combining
{PVM} and {SHMEM} routines",
crossref = "Bode:1996:PVM",
pages = "158--165",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A7130 (Metal-insulator transitions); A7155J
(Localization in disordered structures); C4140 (Linear
algebra); C6110P (Parallel programming); C7320 (Physics
and chemistry computing)",
corpsource = "Inst. for Sci. Comput., Braunschweig, Germany",
keywords = "Anderson model; Anderson-; arithmetic; coarse-grain;
Cray computers; CRAY-; cubic tungsten bronzes; data
distribution; dynamic group; effectiveness;
eigenfunctions; eigenvalues and; eigenvector
calculation; fine-grain; finite precision; Fock
approach; Hamiltonian matrix; HF calculations; Hubbard
model; iterations; iterative; Krylov sequence; load
imbalance; machines; master-slave paradigm; matrix
algebra; membership; metal-insulator transition;
methods; Mott-Hubbard model; NaWO/sub 3/; parallel
algorithms; parallel Lanczos algorithm; parallelisation
strategies; parallelism; physics computing; PVM;
resource allocation; rounding errors; roundoff errors;
SHMEM routines; sodium compounds; software development
costs; stochastic distributions; subroutines; T3D;
tridiagonalisation; unrestricted Hartree-; virtual",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Sener:1996:DPP,
author = "C. Sener and Y. Paker and A. Kiper",
title = "Data-parallel programming on {Helios}, parallel
environment and {PVM}",
crossref = "Yetongnon:1996:PII",
volume = "1",
pages = "2--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6120 (File organisation); C6150N
(Distributed systems software)",
corpsource = "Dept. of Comput. Eng., Middle East Tech. Univ.,
Ankara, Turkey",
keywords = "C language interface; column-sums; computational
complexity; data; data flow computing; data-parallel;
data-parallel programming; environments; evaluation;
Helios operating system; IBM SP/2 system; image
processing; matrix; message passing; network; operating
systems; parallel environment; parallel programming;
Parallel Virtual Machine; performance; portability;
programming; programming tool; PVM; resource
allocation; software performance; software portability;
software tools; speed-up curves; structures;
transputers; type; virtual machines",
sponsororg = "ISCA; IEEE Comput. Soc.; IEEE Tech. Committee on
Operating Syst.; et al",
treatment = "P Practical",
}
@Article{Shyu:1996:ILQ,
author = "Shyong Jian Shyu and H. K.-C. Chang and K.-C. Chou",
title = "Implementation of a linear quadtree coding scheme on
the parallel virtual machine",
journal = j-INT-J-HIGH-SPEED-COMPUTING,
volume = "8",
number = "1",
pages = "65--79",
month = mar,
year = "1996",
CODEN = "IHSCEZ",
ISSN = "0129-0533",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6140C (Optical information, image and video signal
processing); C1250 (Pattern recognition); C5260B
(Computer vision and image processing techniques);
C5440 (Multiprocessing systems); C6120 (File
organisation); C7430 (Computer engineering)",
corpsource = "Dept. of Inf. Manage., Ming Chuan Univ., Taipei,
Taiwan",
fjournal = "International Journal of High Speed Computing",
keywords = "data; encoding; high; image encoding problem; image
manipulations; image processing; image processing
problems; linear quadtree coding; master-slave
paradigm; parallel machines; parallel virtual machine;
performance computing; quadtrees; structure; virtual
machines",
pubcountry = "Singapore",
treatment = "A Application; P Practical",
}
@InProceedings{Silva:1996:IDS,
author = "L. M. Silva and J. G. Silva and S. Chapple",
title = "Implementing distributed shared memory on top of
{MPI}: the {DSMPI} library",
crossref = "IEEE:1996:PFE",
pages = "50--57",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
conftitle = "Proceedings of 4th Euromicro Workshop on Parallel and
Distributed Processing",
corpsource = "Coimbra Univ., Portugal",
keywords = "coherence protocols; consistency; Cray T3D;
distributed memory machines; distributed memory
systems; distributed shared memory; DSMPI library;
message passing; MPI; parallel library; parallel
programming; performance; programming interface;
scalability; shared memory systems; software libraries;
software performance evaluation; software portability;
workstation network",
treatment = "P Practical",
}
@InProceedings{Sitsky:1996:IMU,
author = "D. Sitsky and P. Mackerras and A. Tridgell and D.
Walsh",
title = "Implementing {MPI} under {AP\slash Linux}",
crossref = "IEEE:1996:PSM",
pages = "32--39",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "A preliminary MPI library has been implemented for the
Fujitsu AP1000+ multicomputer running the AP/Linux
operating system. Under this environment, parallel
programs may be dedicated to a fixed partition, or a
number of parallel programs may share a partition.
Therefore, the MPI library has been constructed so that
messaging operations can be driven by polling and/or
interrupt techniques. It has been found that polling
works well when a single parallel program is running on
a given partition, and that interrupt-driven
communication makes far better use of the machine when
multiple parallel programs are executing. Gang
scheduling of multiple parallel programs which use
polling was found to be relatively ineffective.",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C6110P (Parallel
programming); C6140D (High level languages); C6150E
(General utility programs); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6150J
(Operating systems)C6115 (Programming support); C6150N
(Distributed systems software)",
conflocation = "Notre Dame, IN, USA; 1-2 July 1996",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "CAP Res. Program, Australian Nat. Univ., Canberra,
ACT, Australia",
keywords = "AP/Linux; AP/Linux operating system; application
program interfaces; communication; debugging;
distributed memory systems; extensions; Fujitsu AP1000+
multicomputer; gang scheduling; interrupt techniques;
interrupt-driven; interrupt-driven communication;
interrupts; language issues; libraries; message
passing; Message Passing Interface standard; messaging
operations; MPI applications; MPI implementations; MPI
library; multiple parallel programs; operating system;
operating systems (computers); parallel languages;
parallel programming; parallel programs; partition;
performance evaluation; performance portable parallel
programming; polling; processor scheduling; program
debugging; software; software libraries; software
performance evaluation; software standards; utility
programs",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Sitsky:1996:MLW,
author = "D. Sitsky and E. Hayashi",
title = "An {MPI} library which uses polling, interrupts and
remote copying for the {Fujitsu AP1000+}",
crossref = "Li:1996:PSI",
pages = "43--49",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5470 (Performance
evaluation and testing); C6110B (Software engineering
techniques); C6115 (Programming support); C6150N
(Distributed systems software)",
conftitle = "Proceedings Second International Symposium on Parallel
Architectures, Algorithms, and Networks (I-SPAN'96)",
corpsource = "Dept. of Comput. Sci., Australian Nat. Univ.,
Canberra, ACT, Australia",
keywords = "Fujitsu AP1000+; interrupts; message passing; MPI
library; parallel architectures; performance;
performance evaluation; polling; remote copying;
software libraries; software portability",
sponsororg = "Chinese Nat. Res. Center for Intelligent Comput.
Syst.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech.
Committee on Parallel Process.; Steering Committee of
the Chinese Nat. Hi-Tech Programme; Inf. Process. Soc.
Japan; Chinese Comput. Federation; IEICE Inf. and Syst.
Soc",
treatment = "P Practical",
}
@InProceedings{Sivaraman:1996:AAD,
author = "H. Sivaraman and C. S. Raghavendra",
title = "{ADDT}: Automatic Data Distribution Tool for Porting
Programs to {PVM}",
crossref = "El-Rewini:1996:PTN",
volume = "1",
pages = "557--564",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6140D (High level languages); C6150C
(Compilers, interpreters and other processors); C6150N
(Distributed systems software)",
corpsource = "School of Electr. Eng. and Comput. Sci., Washington
State Univ., Pullman, WA, USA",
keywords = "ADDT; automatic data distribution tool; communication
latency; communication statements; data access; data
parallel languages; data partitioning; distributed
memory; distributed memory systems; distribution
blocks; environment; FORTRAN; Fortran; High
Performance; HPF; interpreters; languages; linear
optimization problem; nonlocal; optimisation; parallel;
parallel programming; program; program compilers;
program porting; PVM; shared memory parallel program;
shared memory systems; software portability",
sponsororg = "Univ. Hawaii; Univ. Hawaii College of Bus. Adm",
treatment = "P Practical",
}
@InProceedings{Skjellum:1996:TTM,
author = "A. Skjellum and B. Protopopov and S. Hebert",
title = "A thread taxonomy for {MPI}",
crossref = "IEEE:1996:PSM",
pages = "50--57",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110F
(Formal methods); C6150E (General utility programs);
C6150J (Operating systems); C6150N (Distributed systems
software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Mississippi State Univ., MS,
USA",
keywords = "API extensions; application program interfaces;
Channel Device; computational unit; fine-grain
concurrency; formal specification; message passing;
minimal portable thread management; MPI; MPICH;
multi-threaded thread-safe ADI; non-thread-safe MPI
call semantics; resource container; software
portability; synchronisation; synchronization
mechanisms; thread models; thread safety; thread
taxonomy; user-level mechanism; utility programs;
Windows NT version",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Smith:1996:UWC,
author = "N. P. G. Smith and C. Christopoulos",
title = "Utilising workstation clusters with {PVM} for the
solution of large {TLM} problems",
crossref = "Silvester:1996:SEE",
pages = "3--11",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B1130B (Computer-aided circuit analysis and design);
B5240 (Transmission line theory); C6110P (Parallel
programming); C7410 (Electrical engineering
computing)",
corpsource = "Numerical Modelling Group, Nottingham Univ., UK",
keywords = "data; electrical engineering computing; load
balancing; parallel computing; parallel programming;
Parallel Virtual Machine; partitioning; PVM;
transmission line matrix methods; Transmission Line
Modelling; virtual machines; workstation clusters",
pubcountry = "UK",
sponsororg = "IEE; Univ. Florence",
treatment = "P Practical",
}
@Book{Snir:1996:MCR,
author = "Marc Snir and Steve W. Otto and Steven Huss-Lederman
and David W. Walker and Jack Dongarra",
title = "{MPI}: the complete reference",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xii + 336",
year = "1996",
ISBN = "0-262-69184-1",
ISBN-13 = "978-0-262-69184-0",
LCCN = "QA76.642.M65 1996",
bibdate = "Fri Jan 31 07:16:14 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "US\$27.50",
acknowledgement = ack-nhfb,
}
@InProceedings{Soch:1996:PCG,
author = "M. Soch and J. Trdlicka and P. Tvrdik",
title = "{PVM}, computational geometry, and parallel computing
course",
crossref = "Bode:1996:PVM",
pages = "38--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C0220 (Computing education and training); C4240P
(Parallel programming and algorithm theory); C4260
(Computational geometry); C5440 (Multiprocessing
systems); C6110P (Parallel programming); C6130B
(Graphics techniques); C7310 (Mathematics computing);
C7810C (Computer-aided instruction)",
corpsource = "Dept. of Comput. Sci. and Eng., Czech Tech. Univ.,
Prague, Czech Republic",
keywords = "computational geometry; computer science education;
courseware; distributed memory machines; distributed
memory systems; educational courses; laboratories;
mathematics computing; non-trivial parallel algorithms;
parallel algorithms; parallel computing course;
parallel programming; programming tool; projects;
student PVM; students; SUN; teaching; term projects;
upper level undergraduate; workstation lab",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Squyres:1996:CBP,
author = "J. M. Squyres and A. Lumsdaine and R. L. Stevenson",
title = "A cluster-based parallel image processing toolkit",
crossref = "Grinstein:1996:VDE",
volume = "2421",
pages = "228--239",
year = "1996",
CODEN = "PSISDG",
ISSN = "0277-786X (print), 1996-756X (electronic)",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Lab. for Sci. Comput., Notre Dame Univ., IN, USA",
classification = "B6140C (Optical information, image and video signal
processing); C5260B (Computer vision and image
processing techniques); C6110B (Software engineering
techniques); C6115 (Programming support); C6150N
(Distributed systems software)",
keywords = "Cluster-based computing; Data I/O; Data locality; Load
balancing; Message passing interface; MPI standard;
Network technologies; Parallel computing resource;
Parallel image processing software library; Parallel
image processing toolkit; Sequential image processing;
Specialized massively parallel computing hardware;
Visualization; Workstation clusters",
thesaurus = "Computer networks; Image processing; Message passing;
Parallel processing; Software libraries; Software
tools; Workstations",
}
@Article{Stathopoulos:1996:PIM,
author = "Andreas Stathopoulos and Anders B. Ynnerman and
Charlotte {Froese Fischer}",
title = "A {PVM} Implementation of the {MCHF} Atomic Structure
Package",
journal = j-IJSAHPC,
volume = "10",
number = "1",
pages = "41--61",
month = "Spring",
year = "1996",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software); C7310 (Mathematics computing); C7320
(Physics and chemistry computing); C7400 (Engineering
computing)",
corpsource = "Dept. of Comput. Sci., Vanderbilt Univ., Nashville,
TN, USA",
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
keywords = "algorithmic; atomic data; atomic structure;
calculations; computational demands; computing; CPU;
disk space; engineering; engineering computing;
evaluation; high-end workstation cluster; IBM SP2
multiprocessor; improvements; mathematics; MCHF atomic;
Multiconfiguration Hartree--Fock package; parallel
computers; parallel machines; parallel programming;
Parallel Virtual Machine; physics; prime memory;
problem size; PVM implementation; PVM programming;
science; software packages; software performance;
speed; structure package; user interfaces;
user-friendly interface",
treatment = "A Application; P Practical",
}
@InProceedings{Steed:1996:PPP,
author = "M. R. Steed and M. J. Clement",
title = "Performance prediction of {PVM} programs",
crossref = "IEEE:1996:PII",
pages = "803--807",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6150G (Diagnostic, testing, debugging
and evaluating systems); C6150N (Distributed systems
software)",
corpsource = "Dept. of Comput. Sci., Brigham Young Univ., Provo, UT,
USA",
keywords = "analysis; APACHE; Automated; clusters; debugging;
evaluation; local area networks; message passing
library; Parallel; parallel; parallel computing;
parallel programming; performance; performance tools;
program; program debugging; programming; PVM
Application Characterization Environment; PVM programs;
scalable parallel applications; software libraries;
software performance; software performance prediction;
software reusability; software tools; Virtual Machine;
workstation",
sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM
SIGARCH",
treatment = "P Practical",
}
@InProceedings{Stellner:1996:CCP,
author = "G. Stellner",
title = "{CoCheck}: checkpointing and process migration for
{MPI}",
crossref = "IEEE:1996:PII",
pages = "526--531",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5620L (Local area
networks); C6150N (Distributed systems software)",
conftitle = "Proceedings of International Conference on Parallel
Processing",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
keywords = "checkpointing; CoCheck; consistency; LAN; local area
networks; message passing; message passing library;
MPI; parallel applications; parallel machines; process
migration; resource allocation; single process
checkpointer; software libraries; workstation
networks",
sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM
SIGARCH",
treatment = "P Practical",
}
@InProceedings{Stone:1996:RNF,
author = "J. Stone and M. Underwood",
title = "Rendering of numerical flow simulations using {MPI}",
crossref = "IEEE:1996:PSM",
pages = "138--141",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C6130B (Graphics
techniques); C6150N (Distributed systems software);
C6185 (Simulation techniques); C7320 (Physics and
chemistry computing)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Dept. of Comput. Sci., Missouri Univ., Rolla, MO,
USA",
keywords = "application program interfaces; computational fluid
dynamics code; data visualisation; dedicated graphics
workstations; digital simulation; disks; fluid
dynamics; IBM SP2; in-place rendering; Intel iPSC/860;
Intel Paragon; message passing; Message Passing
Interface; MPI; networked graphics workstations;
numerical flow simulations; parallel architectures;
physics computing; ray tracing; ray tracing library;
rendering (computer graphics); run-time visualization;
software libraries; workstation networks",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "A Application; P Practical",
}
@InProceedings{Strietzel:1996:PTS,
author = "M. Strietzel",
title = "Parallel turbulence simulation based on {MPI}",
crossref = "Liddell:1996:HPC",
pages = "283--289",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A0260 (Numerical approximation and analysis); A0270
(Computational techniques); A4710 (General fluid
dynamics theory, simulation and other computational
methods); A4725 (Turbulent flows, convection, and heat
transfer); C4240P (Parallel programming and algorithm
theory); C6110P (Parallel programming); C7320 (Physics
and chemistry computing)",
conftitle = "High-Performance Computing and Networking.
International Conference and Exhibition HPCN Europe
1996",
corpsource = "Zentrum fur Paralleles Rechnen, Koln Univ., Germany",
keywords = "direct numerical simulation; divide and conquer
method; divide and conquer methods; domain
decomposition; flow simulation; large-eddy simulation;
message passing; message passing platform; MPI;
numerical analysis; parallel algorithms; parallel
turbulence simulation; parallelization strategy;
physics computing; Poisson equation; three dimensional
incompressible Navier--Stokes equation; turbulence;
turbulent fluids",
treatment = "T Theoretical or Mathematical",
}
@Article{Subramaniam:1996:CLU,
author = "Krishnan R. Subramaniam and Suraj C. Kothari and Don
Heller",
title = "A Communication Library Using Active Messages to
Improve Performance of {PVM}",
journal = j-J-PAR-DIST-COMP,
volume = "39",
number = "2",
pages = "146--152",
day = "15",
month = dec,
year = "1996",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1996.0162",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:19:01 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.0162/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.0162/production/pdf",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6150N
(Distributed systems software)",
corpsource = "Dept. of Comput. Sci., Iowa State Univ., Ames, IA,
USA",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "active messages; communication; communication library;
controlled; message passing; parallel processing;
polling; primitives; signal driven message
notification; software libraries; software performance
evaluation; virtual machines",
treatment = "P Practical",
}
@InProceedings{Sunderam:1996:PSS,
author = "V. Sunderam",
title = "The {PVM} system: status, trends, and directions",
crossref = "Bode:1996:PVM",
pages = "68--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150N (Distributed
systems software); C7430 (Computer engineering)",
corpsource = "Dept. of Math. and Comput. Sci., Emory Univ., Atlanta,
GA, USA",
keywords = "API; application program interfaces; environments;
future directions; heterogeneous; heterogeneous
concurrent computing; high performance computing;
network computing; parallel programming; Parallel
Virtual Machine; programming model; PVM system; robust
portable implementations; software framework; virtual
machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Suttner:1996:SPB,
author = "C. B. Suttner",
title = "{SPTHEO} --- a {PVM-based} parallel theorem prover",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "116--125",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C1160 (Combinatorial mathematics); C1180
(Optimisation techniques); C4210 (Formal logic); C6110P
(Parallel programming); C7430 (Computer engineering)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
fjournal = "Lecture Notes in Computer Science",
keywords = "communication aspects; environments; high latency;
message passing; parallel programming; parallel search;
parallelization; portable implementation; proof; PVM
based parallel theorem prover; PVM message passing
system; search problems; SETHEO; SPS model; SPTHEO;
system; theorem proving; virtual machines; workstation
networks",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Theodoropoulos:1996:ESP,
author = "P. Theodoropoulos and G. Manis and P. Tsanakas and G.
Papakonstantinou",
title = "Extending synchronization {PVM} mechanisms",
crossref = "Bode:1996:PVM",
pages = "315--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6150J (Operating
systems); C6150N (Distributed systems software); C7430
(Computer engineering)",
corpsource = "Dept. of Electr. Eng., Nat. Tech. Univ. of Athens,
Greece",
keywords = "barriers; global semaphores; message; message passing;
operating system; operating systems (computers); Orchid
platform; parallel; parallel virtual machine; passing;
primitives; programming; synchronisation;
synchronization; virtual machines",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Touhafi:1996:DPC,
author = "A. Touhafi and W. Brissinck and E. F. Dirkx",
title = "Development of {PVM} code for a low latency switch
based interconnect",
crossref = "Bode:1996:PVM",
pages = "229--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6110P (Parallel
programming); C6150J (Operating systems); C6150N
(Distributed systems software); C7430 (Computer
engineering)",
corpsource = "V.U.B. TW-INFO, Brussels, Belgium",
keywords = "communication; device driver; device drivers;
distributed parallel computing; end latency; Ethernet;
fast switch based network; intensive applications;
interchanged messages; interconnection networks; low
latency switch based interconnect; machines; message;
message passing; message passing tool; multiprocessor;
parallel programming; passing; PVM code; PVM routines;
small end-to-; virtual",
pubcountry = "Germany",
treatment = "P Practical",
}
@Article{Twerda:1996:PIT,
author = "A. Twerda and A. P. {Van den Berg} and A. J. {Van der
Steen}",
title = "Parallel implementation of time dependent
{Rayleigh-Benard} convection",
journal = j-SUPERCOMPUTER,
volume = "12",
number = "2",
pages = "36--47",
month = mar,
year = "1996",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Wed Mar 18 08:37:01 MST 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Dept. of Geophys., Utrecht Univ., Netherlands",
classification = "A4720 (Hydrodynamic stability and instability);
C5440 (Multiprocessing systems); C7320 (Physics and
chemistry computing)",
corpsource = "Dept. of Geophys., Utrecht Univ., Netherlands",
fjournal = "Supercomputer",
keywords = "distributed memory parallel systems; Gray T3D; message
passing; message passing model; parallel
implementation; parallel models; parallel processing;
physics computing; PVM; Rayleigh-Benard instability;
scalability; time dependent Rayleigh-Benard
convection",
pubcountry = "Netherlands",
treatment = "A Application; P Practical",
}
@InProceedings{Uhl:1996:PIC,
author = "A. Uhl and J. Hammerle",
title = "Parallel image compression on a workstation cluster
using {PVM}",
crossref = "Bode:1996:PVM",
pages = "301--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B6120B (Codes); B6140C (Optical information, image
and video signal processing); C1250 (Pattern
recognition); C4240P (Parallel programming and
algorithm theory); C5440 (Multiprocessing systems)",
corpsource = "Dept. of Comput. Sci. and Syst. Anal., Salzburg Univ.,
Austria",
keywords = "data compression; FDDI; fractal image compression;
image coding; interconnected workstation-cluster;
parallel algorithms; parallel image compression;
parallel machines; parallel meta-algorithm; PVM;
virtual machines; workstation cluster",
pubcountry = "Germany",
treatment = "A Application; P Practical",
}
@InProceedings{Wagner:1996:GSG,
author = "T. Wagner and C. Kueblbeck and C. Schittko",
title = "Genetic selection and generation of textural features
with {PVM}",
crossref = "Bode:1996:PVM",
pages = "305--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B0260 (Optimisation techniques); B6140C (Optical
information, image and video signal processing); C1180
(Optimisation techniques); C4240P (Parallel programming
and algorithm theory); C5260B (Computer vision and
image processing techniques); C7330 (Biology and
medical computing); C7430 (Computer engineering)",
corpsource = "Fraunhofer-Institut fur Integrierte Schaltungen,
Erlangen, Germany",
keywords = "algorithms; cell identification; Gallops PVM package;
genetic algorithms; genetic selection; medical image
processing; medical imaging; parallel; PVM; quality
control; surface inspection; systems; textural features
generation; textural image features; textured images;
tumor; virtual machines",
pubcountry = "Germany",
treatment = "A Application; P Practical",
}
@Article{Wagner:1996:PMM,
author = "J. C. Wagner and A. Haghighat",
title = "Parallel {MCNP Monte Carlo} transport calculations
with {MPI}",
journal = j-TRANS-AM-NUCL-SOC,
volume = "75",
number = "??",
pages = "338--339",
month = "????",
year = "1996",
CODEN = "TANSAO",
ISSN = "0003-018X",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A0250 (Probability theory, stochastic processes, and
statistics); A0270 (Computational techniques); A2820H
(Neutron diffusion); A2841C (Computer codes for fission
reactor theory and design); C1140G (Monte Carlo
methods); C4240P (Parallel programming and algorithm
theory); C6110P (Parallel programming); C7470 (Nuclear
engineering computing)",
conftitle = "American Nuclear Society and the European Nuclear
Society 1996 International Conference on the Global
Benefits of Nuclear Technology and the Embedded Topical
Meetings. Low- Level Radiation Health Effects, DD and
R: Worldwide Experience- DD and R, What Does it Mean,
and International Nuclear Policy Issues (papers in
summary form only received)",
corpsource = "Pennsylvania State Univ., University Park, PA, USA",
fjournal = "Transactions of the American Nuclear Society",
keywords = "fission reactor kinetics; high-speed communication
switches; message passing; message-passing interface;
message-passing library package; message-passing
software package; Monte Carlo methods; MPI; neutron
transport theory; nuclear engineering computing;
parallel algorithms; parallel MCNP Monte Carlo
transport calculations; parallel Monte Carlo; parallel
virtual machine; workstation clusters",
treatment = "P Practical",
}
@TechReport{Walker:1996:MFA,
author = "David W. Walker",
title = "{MPI}: from Fundamentals to Applications",
institution = inst-ORNL,
address = inst-ORNL:adr,
year = "1996",
bibdate = "Tue Jan 16 08:29:47 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/~walker/mpi/SLIDES/mpi-tutorial.html",
}
@Misc{Walker:1996:MP,
author = "David W. Walker",
title = "{MPI2} Proposals",
howpublished = "World-Wide Web",
year = "1996",
bibdate = "Tue Jan 16 08:33:57 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/~walker/mpi/mpi2-proposals.html",
}
@Article{Walker:1996:MSM,
author = "D. W. Walker and J. J. Dongarra",
title = "{MPI}: a standard Message Passing Interface",
journal = j-SUPERCOMPUTER,
volume = "12",
number = "1",
pages = "56--68",
month = jan,
year = "1996",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
classification = "C5640 (Protocols); C6150N (Distributed systems
software); C6155 (Computer communications software)",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
fjournal = "Supercomputer",
keywords = "collective communication routine; Collective
communication routine; computer interface; Computer
interface; computer interfaces; computer standard;
Computer standard; distributed processing; Distributed
processing; Europe; massively parallel computer;
Massively parallel computer; message passing; message
passing interface; Message passing interface; MPI;
network interface; Network interface; network
interfaces; parallel processing; Parallel processing;
point-to-point; Point-to-point; protocols; software
standards; standards; United States; workstation
network; Workstation network",
pubcountry = "Netherlands",
thesaurus = "Computer interfaces; Distributed processing; Message
passing; Network interfaces; Parallel processing;
Protocols; Software standards; Standards",
treatment = "P Practical",
}
@Article{Walker:1996:RBC,
author = "D. W. Walker and S. W. Otto",
title = "Redistribution of block-cyclic data distributions
using {MPI}",
journal = j-CPE,
volume = "8",
number = "9",
pages = "707--728",
month = nov,
year = "1996",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:27 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=23305",
acknowledgement = ack-nhfb,
classification = "C4240P (Parallel programming and algorithm theory);
C6140D (High level languages); C6150N (Distributed
systems software)",
corpsource = "Math. Sci. Sect., Oak Ridge Nat. Lab., TN, USA",
fjournal = "Concurrency, practice and experience",
keywords = "block-cyclic data distributions; block-cyclic fashion;
broadcast; collective communication operations;
FORTRAN; High Performance Fortran; IBM SP-1; Intel
Paragon; load balancing properties; message passing;
message passing algorithms; MPI; MPI-ALLTOALL; parallel
algorithms; processor scheduling; redistribution
operation; resource allocation",
treatment = "T Theoretical or Mathematical",
}
@InProceedings{Wedemeijer:1996:PSA,
author = "H. Wedemeijer and H. L. H. Cox and D. J. Verschuur and
I. L. Ritsema",
title = "Parallelisation of seismic algorithms using {PVM} and
{FORGE}",
crossref = "Liddell:1996:HPC",
pages = "352--??",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "A9130 (Seismology); A9130R (Controlled source
seismology); A9365 (Data and information; A9385
(Instrumentation and techniques for geophysical,
hydrospheric and lower atmosphere research);
acquisition, processing, storage and dissemination in
geophysics); C5260B (Computer vision and image
processing techniques); C6110P (Parallel programming);
C7340 (Geophysics computing)",
corpsource = "TNO Inst. of Appl. Geosci., Delft, Netherlands",
keywords = "algorithms; Earth subsurface; explosion seismology;
FORGE; geophysical prospecting; geophysical signal
processing; geophysical techniques; geophysics
computing; imaging; implementation; measurement
technique; method; optimisation; parallel; parallel
programming; parallelisation; prospecting; PVM; seismic
algorithm; seismic reflection profiling; seismology;
signal processing",
pubcountry = "Germany",
treatment = "P Practical; T Theoretical or Mathematical",
}
@Article{Wilson:1996:SMS,
author = "G. C. Wilson and T. H. Wood and J. L. Zyskind and J.
W. Sulhoff and J. E. Johnson and T. Tanbun-Ek and P. A.
Morton",
title = "{SBS} and {MPI} suppression in analogue systems with
integrated electroabsorption modulator\slash {DFB}
laser transmitters",
journal = j-ELECT-LETTERS,
volume = "32",
number = "16",
pages = "1502--1504",
month = "????",
year = "1996",
CODEN = "ELLEAK",
ISSN = "0013-5194 (print), 1350-911X (electronic)",
ISSN-L = "0013-5194",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "B4150 (Electro- optical devices); B4360 (Laser
applications); B6260 (Optical links and equipment);
B6430D (CATV and wired systems)",
corpsource = "Lucent Technol., Holmdel, NJ, USA",
fjournal = "Electronics Letters",
keywords = "1.7 GHz; analogue systems; cable television; CATV
systems; chirp modulation; DFB laser transmitters;
distributed feedback lasers; dithering; electro-optical
modulation; electroabsorption; electroabsorption
modulator; excess noise; integrated modulator/laser
transmitters; interference suppression; laser beam
applications; laser bias current; low-chirp modulation;
MPI suppression; multipath interference; narrow
linewidth sources; optical fibre communication; optical
noise; optical transmitters; SBS suppression;
stimulated Brillouin scattering",
treatment = "X Experimental",
}
@Article{Wismueller:1996:SBV,
author = "R. Wismueller",
title = "State Based Visualization of {PVM} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "91--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wismueller:1996:TSI,
author = "R. Wismueller and T. Ludwig",
title = "The Tool-Set --- An Integrated Tool Environment for
{PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "??",
number = "1067",
pages = "1029--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Wismuller:1996:SBV,
author = "R. Wismuller",
title = "State based visualization of {PVM} applications",
crossref = "Bode:1996:PVM",
pages = "",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6110V (Visual
programming); C6130B (Graphics techniques); C6150G
(Diagnostic, testing, debugging and evaluating
systems); C7430 (Computer engineering)",
corpsource = "Inst. fur Inf., Tech. Univ. Munchen, Germany",
keywords = "animated sequence; behavior; consistent; critical
issue; debugging; dynamic; event ordering algorithm;
global; global clock; optimization; parallel
programming; parallel programs; program diagnostics;
program flow; PVM applications; state based
visualization; state based visualizer; states; virtual
machines; VISTOP; visual programming; visualization;
visualization tool",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Wismuller:1996:TSI,
author = "R. Wismuller and T. Ludwig",
title = "The {Tool Set} --- an integrated tool environment for
{PVM}",
crossref = "Liddell:1996:HPC",
pages = "",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support)",
corpsource = "Lehrstuhl fur Rechnertechnik und Rechnerorganisation,
Tech. Univ. Munchen, Germany",
keywords = "checkpointing; debugging; deterministic execution;
development; integrated tool environment; load
balancing; parallel I/O; parallel program; parallel
programming; performance analysis; program flow
visualization; programming environments; PVM; software;
Tool Set; tools",
pubcountry = "Germany",
treatment = "P Practical",
}
@InProceedings{Wolf:1996:CFS,
author = "K. Wolf and E. Brakkee",
title = "Coupling fluids and structures codes on {MPI}",
crossref = "IEEE:1996:PSM",
pages = "130--137",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6115
(Programming support); C6150N (Distributed systems
software); C6185 (Simulation techniques)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "German Nat. Res. Center for Comput. Sci., St.
Augustin, Germany",
keywords = "address-spaces; API; application program interface;
application program interfaces; computational power;
dedicated neighborhoods; digital simulation; dynamic
process sets; fluids codes; industrial simulation
applications; message passing; message passing
interface; message passing libraries; MPI; MPI-WORLDs;
parallel systems; portability; software libraries;
software portability; standalone address-spaces;
storage management; structures codes; synchronisation",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Worley:1996:MPE,
author = "P. H. Worley",
title = "{MPI} performance evaluation and characterization
using a compact application benchmark code",
crossref = "IEEE:1996:PSM",
pages = "170--177",
year = "1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software)",
conftitle = "Proceedings. Second MPI Developer's Conference",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "application program interface; application program
interfaces; communication library; communication
protocol; communication routines;
communication-intensive application code; compact
application benchmark code; Cray Research T3D; IBM SP2;
Intel Paragon; message passing; message passing
standard; MPI; parallel benchmark code; parallel
machines; PSTSWM; software libraries; software
performance evaluation; software standards",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "P Practical",
}
@InProceedings{Xiong:1996:BID,
author = "Jianxin Xiong and Dingxing Wang and Weimin Zheng and
Meiming Shen",
title = "{BUSTER}: an integrated debugger for {PVM}",
crossref = "IEEE:1996:PIS",
pages = "",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5620L (Local area networks); C6110P (Parallel
programming); C6115 (Programming support); C6150G
(Diagnostic, testing, debugging and evaluating
systems); C6150N (Distributed systems software)",
corpsource = "Dept. of Comput. Sci. and Technol., Tsinghua Univ.,
Beijing, China",
keywords = "BUSTER debugger; communication; debugging; global
states; integrated parallel debugger; local area
networks; nondeterminism; parallel; parallel
programming; performance debugging; program;
programming environments; PVM programming environment;
related errors; workstation clusters; workstation
network; workstations",
treatment = "P Practical",
}
@Article{Xu:1996:MCO,
author = "Zhiwei Xu and Kai Hwang",
title = "Modeling Communication Overhead: {MPI} and {MPL}
Performance on the {IBM SP2}",
journal = j-IEEE-PAR-DIST-TECH,
volume = "4",
number = "1",
pages = "9--24",
month = "Spring",
year = "1996",
CODEN = "IPDTEX",
DOI = "https://doi.org/10.1109/88.481662",
ISSN = "1063-6552 (print), 1558-1861 (electronic)",
ISSN-L = "1063-6552",
bibdate = "Thu Apr 10 19:14:33 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C5470 (Performance
evaluation and testing)",
corpsource = "Acad. Sinica, Beijing, China",
fjournal = "IEEE parallel and distributed technology: systems and
applications",
keywords = "architectural bottlenecks; collective; collective
communication; collective computation; communication;
communication overhead modelling; communication
performance evaluation; computation; IBM computers; IBM
Message-Passing; IBM Message-Passing Library; IBM SP2;
Library; machine size; massively parallel; massively
parallel processors; message length; message passing;
Message-Passing Interface; message-passing
multicomputers; message-passing operations; MPI
performance; MPL; MPL performance; optimization;
overhead-; overhead- quantifying method; parallel
applications; parallel applications optimization;
parallel machines; performance; performance evaluation;
point-to-point; point-to-point communication;
processors; quantifying method; timing; timing
measurements",
treatment = "X Experimental",
}
@Article{Yoon:1996:WBP,
author = "D.-K. Yoon and J.-L. Gaudiot",
title = "Worker-Based Parallel Computing on {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1123",
pages = "506--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5440 (Multiprocessing systems); C6150N (Distributed
systems software)",
corpsource = "Dept. of Electr. Eng. Syst., Univ. of Southern
California, Los Angeles, CA, USA",
fjournal = "Lecture Notes in Computer Science",
keywords = "asynchronous tasks; high performance computing;
message passing; networks of workstations; parallel
function calls; parallel processing; parallel
processing subsystem; Parallel Virtual Machine;
primitives; run-time system; software package; software
packages; user application programs; worker-based
parallel computing",
pubcountry = "Germany",
treatment = "P Practical",
xxpages = "2--??",
xxvolume = "1",
}
@Article{Yuasa:1996:RPG,
author = "F. Yuasa and S. Kawabata and T. Ishikawa and D.
Perret-Gallix and T. Kaneko",
title = "Running {PVM-GRACE} on Workstation Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1156",
pages = "335--??",
month = "????",
year = "1996",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150N (Distributed systems software); C7320
(Physics and chemistry computing)",
corpsource = "Nat. Lab. for High Energy Phys., Ibaraki, Japan",
fjournal = "Lecture Notes in Computer Science",
keywords = "automatic Feynman diagram computation; event
generation; Feynman diagrams; high energy physics;
parallel machines; parallel virtual machine; physics
computing; PVM-GRACE; software packages; software
packet; virtual machines; workstation clusters",
pubcountry = "Germany",
treatment = "A Application; P Practical",
}
@InProceedings{Zambonelli:1996:EPP,
author = "F. Zambonelli and M. Pugassi and L. Leonardi and N.
Scarabottolo",
title = "Experiences on porting a {Parallel Objects}
environment from a transputer network to a {PVM-based}
system",
crossref = "IEEE:1996:PFE",
pages = "",
year = "1996",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110B (Software engineering techniques); C6110J
(Object-oriented programming); C6110P (Parallel
programming); C6115 (Programming support); C6150N
(Distributed systems software)",
corpsource = "Dipartimento di Elettronica Inf. e Sistemistica,
Bologna Univ., Italy",
keywords = "computer aided software engineering; heterogeneous
computer networks; massively parallel architecture;
object-oriented; parallel object-oriented programming;
Parallel Objects; parallel programming; programming;
programming environments; PVM environment; software
portability",
treatment = "P Practical",
}
@TechReport{Zhou:1996:FMP,
author = "Honbo Zhou and Al Geist",
title = "Faster Message Passing in {PVM}",
institution = "Mathematical Sciences Section, " # inst-ORNL,
address = inst-ORNL:adr,
year = "1996",
pages = "7",
bibdate = "Tue Jan 16 08:18:15 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.epm.ornl.gov/~zhou/patm.ps",
}
@Article{Adamo:1997:AOO,
author = "J.-M. Adamo",
title = "{ARCH}, An Object Oriented {MPI}-Based Library for
Asynchronous and Loosely Synchronous Parallel System
Programming",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "67--74",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Aguilar:1997:PMS,
author = "J. Aguilar and T. Jimenez",
title = "A Processors Management System for {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1300",
pages = "158--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ahmad:1997:EVP,
author = "Ishfaq Ahmad",
title = "{Express} versus {PVM}: a performance comparison",
journal = j-PARALLEL-COMPUTING,
volume = "23",
number = "6",
pages = "783--812",
day = "20",
month = jun,
year = "1997",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:15:27 MDT 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=23&issue=6;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=23&issue=6&aid=1138",
acknowledgement = ack-nhfb,
affiliation = "Hong Kong Univ of Science and Technology",
affiliationaddress = "Kowloon, Hong Kong",
classification = "716.1; 722.4; 723; 723.1",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "Computer programming; Computer software portability;
Computer workstations; Data communication systems;
Hypercube computers; Interprocessor communication;
Parallel algorithms; Parallel processing systems;
Parallel virtual machine",
}
@Article{Alexandrov:1997:PMC,
author = "V. Alexandrov and K. Chan and A. Gibbons and W.
Rytter",
title = "On the {PVM\slash MPI} Computations of Dynamic
Programming Recurrences",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "305--312",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Alfaro:1997:FDW,
author = "F. J. Alfaro and J. A. Gallud and J. L. Sanchez",
title = "A Function to Dynamic Workload Allocation in
Distributed Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "219--225",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Alonso:1997:PBB,
author = "J. L. Alonso and H. Schmidt and V. N. Alexandrov",
title = "Parallel Branch and Bound Algorithms for Integer and
Mixed Integer Linear Programming Problems under {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "313--320",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Anonymous:1997:TNR,
author = "Anonymous",
title = "Technology News \& Reviews: {Chemkin} software;
{OpenMP Fortran Standard}; {ODE} Toolbox for {Matlab};
{Java} products; {Scientific WorkPlace 3.0}",
journal = j-IEEE-COMPUT-SCI-ENG,
volume = "4",
number = "4",
pages = "75--??",
month = oct # "\slash " # dec,
year = "1997",
CODEN = "ISCEE4",
ISSN = "1070-9924 (print), 1558-190X (electronic)",
ISSN-L = "1070-9924",
bibdate = "Sat Jan 9 08:57:23 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/cs/books/cs1997/pdf/c4075.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computational Science \& Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99",
}
@Article{Aversa:1997:MDP,
author = "R. Aversa and G. Iannello and N. Mazzocca",
title = "An {MPI} Driven Parallelization Strategy for Different
Computing Platforms: a Case Study",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "401--408",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bala:1997:PVQ,
author = "P. Bala and T. Clark and P. Grochowski and B. Lesyng",
title = "Parallel Version of a Quantum Classical Molecular
Dynamics Code for Complex Molecular and Biomolecular
Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "409--416",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Baraglia:1997:IPW,
author = "R. Baraglia and M. Cosso and D. Laforenza and M.
Nicosia",
title = "Integrating {PVaniM} into {WAMM} for Monitoring
Meta-Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "226--233",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Barbosa:1997:EUW,
author = "J. G. Barbosa and A. J. Padilha and J.-P. Madier and
T. Neubert",
title = "Experiments on Using {WPVM} for Industrial Visual
Inspection Problems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1300",
pages = "828--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Beazley:1997:EMP,
author = "D. M. Beazley and P. S. Lomdahl",
title = "Extensible message passing application development and
debugging with {Python}",
crossref = "IEEE:1997:PIP",
pages = "650--655",
year = "1997",
bibdate = "Thu May 21 19:02:04 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture)C5440 (Multiprocessing
systems); C6110P (Parallel programming); C6115
(Programming support); C6140D (High level languages);
C6150C (Compilers, interpreters and other processors);
C6150G (Diagnostic, testing, debugging and evaluating
systems); C6150N (Distributed systems software)",
conftitle = "Proceedings 11th International Parallel Processing
Symposium",
corpsource = "Dept. of Comput. Sci., Utah Univ., Salt Lake City, UT,
USA",
keywords = "application specific debugging; CM-5; Cray T3D;
extensible message passing application debugging;
extensible message passing application development;
interpreted object oriented scripting language;
large-scale message passing applications; message
passing; message passing program writing; molecular
dynamics application; MPI; multiprocessing systems;
object-oriented languages; parallel machines; parallel
programming; program debugging; program interpreters;
Python parallelisation; rapid prototyping; software
prototyping; Sun multiprocessor servers",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Parallel
Process.; ACM SIGARCH; Eur. Assoc. Theor. Comput. Sci.
(EATCS); Swiss Special Interest Group on Parallelism
(SIPAR); SPPEDUP Soc",
treatment = "P Practical",
}
@Article{Beisel:1997:EMD,
author = "T. Beisel and E. Gabriel and M. Resch",
title = "An Extension to {MPI} for Distributed Computing on
{MPPs}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "75--82",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bendtsen:1997:RLS,
author = "C. Bendtsen and Z. Zlatev",
title = "Running Large-Scale Air Pollution Models on Message
Passing Machines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "417--426",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bhandarkar:1997:CRP,
author = "Suchendra M. Bhandarkar and Salem Machaka",
title = "Chromosome Reconstruction from Physical Maps Using a
Cluster of Workstations",
journal = j-J-SUPERCOMPUTING,
volume = "11",
number = "1",
pages = "61--86",
month = mar,
year = "1997",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1023/A:1007913429509",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 6 12:13:06 MDT 2005",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=11&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/issuetoc.htm/0920-8542+11+1+1997",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=11&issue=1&spage=61;
http://www.wkap.nl/oasis.htm/141471",
acknowledgement = ack-nhfb,
classification = "C1180 (Optimisation techniques); C6110P (Parallel
programming); C6150N (Distributed systems software);
C7330 (Biology and medical computing); C7430 (Computer
engineering)",
corpsource = "Dept. of Comput. Sci., Georgia Univ., Athens, GA,
USA",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Aspergillus nidulans; biology computing; central
computational problem; Chromosome IV; chromosome
reconstruction; classical NP complete optimal linear
arrangement problem; clonal data; clone ordering;
genetics; genomic library; heterogeneous collection;
Markov chain decomposition; microcanonical annealing;
networked computers; parallel algorithms; Parallel
Virtual Machine; physical maps; PVM system; simulated
annealing; single monolithic parallel computing
resource; software system; Unix workstations; virtual
machines; workstation cluster",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@Article{Blackford:1997:PEN,
author = "L. S. Blackford and A. Cleary and A. Petitet and R. C.
Whaley and J. Demmel and I. Dhillon and H. Ren and K.
Stanley and J. Dongarra and S. Hammarling",
title = "Practical Experience in the Numerical Dangers of
Heterogeneous Computing",
journal = j-TOMS,
volume = "23",
number = "2",
pages = "133--147",
month = jun,
year = "1997",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/264029.264030",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/citations/journals/toms/1997-23-2/p133-blackford/",
abstract = "Special challenges exist in writing reliable numerical
library software for heterogeneous computing
environments. Although a lot of software for
distributed-memory parallel computers has been written,
porting this software to a network of workstations
requires careful consideration. The symptoms of
heterogeneous computing failures can range from
erroneous results without warning to deadlock. Some of
the problems are straightforward to solve, but for
others the solutions are not so obvious, or incur an
unacceptable overhead. Making software robust on
heterogeneous systems often requires additional
communication. We describe and illustrate the problems
encountered during the development of ScaLAPACK and the
NAG Numerical PVM Library. Where possible, we suggest
ways to avoid potential pitfalls, or if that is not
possible, we recommend that the software not be used on
heterogeneous networks.",
acknowledgement = ack-rfb # " and " # ack-kr,
fjournal = "ACM Transactions on Mathematical Software",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "distributed-memory systems, floating-point arithmetic,
heterogeneous processor networks, message passing,
numerical software, reliability",
subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming, Distributed programming. {\bf
G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS,
General, Computer arithmetic. {\bf G.1.0} Mathematics
of Computing, NUMERICAL ANALYSIS, General, Parallel
algorithms.",
}
@Article{Blaheta:1997:PIP,
author = "R. Blaheta and O. Jakl and J. Stary",
title = "{PVM}-Implementation of the {PCG} Method with
Displacement Decomposition",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "321--328",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bozas:1997:PED,
author = "G. Bozas and M. Fleischhauer and S. Zimmermann",
title = "{PVM} Experiences in Developing the {MIDAS} Parallel
Database System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "427--434",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bramley:1997:TNR,
author = "Randall Bramley",
title = "Technology News \& Reviews: {Chemkin} software;
{OpenMP Fortran Standard}; {ODE} Toolbox for {Matlab};
{Java} products; {Scientific WorkPlace 3.0}",
journal = j-IEEE-COMPUT-SCI-ENG,
volume = "4",
number = "4",
pages = "75--78",
month = oct # "\slash " # dec,
year = "1997",
CODEN = "ISCEE4",
ISSN = "1070-9924 (print), 1558-190X (electronic)",
ISSN-L = "1070-9924",
bibdate = "Sat Jan 9 08:57:23 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/ieeecomputscieng.bib;
http://www.math.utah.edu/pub/tex/bib/java.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/cs/books/cs1997/pdf/c4075.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computational Science \& Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99",
}
@Article{Bruck:1997:EMP,
author = "Jehoshua Bruck and Danny Dolev and Ching-Tien Ho and
Marcel-C{\u{a}}t{\u{a}}lin Ro{\c{s}}u and Ray Strong",
title = "Efficient Message Passing Interface ({MPI}) for
Parallel Computing on Clusters of Workstations",
journal = j-J-PAR-DIST-COMP,
volume = "40",
number = "1",
pages = "19--34",
day = "10",
month = jan,
year = "1997",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1996.1267",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:19:01 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1267/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1267/production/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1267/production/ref",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5620L (Local area
networks); C6110P (Parallel programming); C6115
(Programming support)",
corpsource = "California Inst. of Technol., Pasadena, CA, USA",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "clusters of; collective communication functionality;
industrial; level reliable transport protocol; local
area networks; message passing; message passing
interface; MPI-CCL layer; parallel computing; parallel
programming; parallel programming environments;
point-to-point communication; programming environments;
standard; standards; user-; workstations",
treatment = "A Application; P Practical",
}
@Article{Brune:1997:HMP,
author = "Matthias Brune and J{\"o}rn Gehring and Alexander
Reinefeld",
title = "Heterogeneous Message Passing and a Link to Resource
Management",
journal = j-J-SUPERCOMPUTING,
volume = "11",
number = "4",
pages = "355--369",
month = dec,
year = "1997",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1023/A:1007966723231",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 6 12:13:07 MDT 2005",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=11&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/issuetoc.htm/0920-8542+11+4+1997",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=11&issue=4&spage=355;
http://www.wkap.nl/oasis.htm/147011",
acknowledgement = ack-nhfb,
classification = "C5640 (Protocols); C6150J (Operating systems);
C6150N (Distributed systems software)",
corpsource = "Paderborn Univ., Germany",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "communication protocols; computer resource management;
message passing; MPI; parallel process communication;
portability; process mapping; processor scheduling;
protocols; PVM; resource allocation; resource
management",
pubcountry = "Netherlands",
treatment = "T Theoretical or Mathematical",
}
@Article{Bubak:1997:EPA,
author = "M. Bubak and W. Funika and J. Moscinski",
title = "Evaluation of Parallel Application's Behavior in
Message Passing Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "234--241",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Carvalho:1997:PCC,
author = "L. M. R. Carvalho and J. M. L. M. Palma",
title = "Parallelization of a {CFD} Code Using {PVM} and Domain
Decomposition Techniques",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1215",
pages = "247--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Aug 22 11:59:49 MDT 1997",
bibsource = "ftp://ftp.math.utah.edu/pub/bibnet/subjects/domain-decomp.bib;
http://www.math.utah.edu/pub/tex/bib/lncs1997a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ciegis:1997:NID,
author = "R. Ciegis and R. Sablinskas and J. Wasniewski",
title = "Numerical Integration on Distributed-Memory Parallel
Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "329--336",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Clematis:1997:DNL,
author = "A. Clematis and A. Coda and M. Spagnuolo and M.
Mineter",
title = "Developing Non-Local Iterative Parallel Algorithms for
{GIS} on {Cray T3D} Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "435--442",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Cotronis:1997:MPP,
author = "J. Y. Cotronis",
title = "Message-Passing Program Development by Ensemble",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "242--249",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Demuynck:1997:DOD,
author = "K. Demuynck and J. Broeckhove and F. Arickx",
title = "Dynamic Optimization of a Distributed {VR} System by
Network-Balancing",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "443--450",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Derakhshan:1997:PEP,
author = "M. Derakhshan and S. Hammarling and A. Krommer",
title = "{PINEAPL}: a {European} Project on {Parallel
Industrial Numerical Applications and Portable
Libraries}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "337--342",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{DiMartino:1997:IPD,
author = "B. {Di Martino} and A. Mazzeo and N. Mazzocca and U.
Villano",
title = "Interaction Patterns Detection in {PVM} Programs to
Support Simulation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "250--256",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{DiMartino:1997:MDH,
author = "V. {Di Martino} and G. Ruocco",
title = "Molecular Dynamics on Hybrid Memory Machines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "451--456",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{DiNapoli:1997:DCA,
author = "C. {Di Napoli} and M. Giordano and M. M. Furnari",
title = "Distributed and Cooperative Applications in {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "83--90",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dongarra:1997:BCA,
author = "J. J. Dongarra and F. Desprez and A. Petitet and C.
Randriamaro",
title = "Block-Cyclic Array Redistribution on Networks of
Workstations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "343--350",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Dongarra:1997:CSD,
author = "J. J. Dongarra and S. Hammarling and A. Petitet",
title = "Case studies on the development of {ScaLAPACK} and the
{NAG} Numerical {PVM} Library",
crossref = "Boisvert:1997:QNS",
pages = "236--248",
year = "1997",
bibdate = "Thu Sep 16 09:48:36 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/utk/papers/woco96/woco96.html;
http://www.netlib.org/utk/papers/woco96/woco96.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/woco96.pdf",
acknowledgement = ack-nhfb,
}
@InProceedings{Dou:1997:ISV,
author = "Yong Dou and Zhengbing Pang and Xingming Zhou",
title = "Implementing a software virtual shared memory on
{PVM}",
crossref = "IEEE:1997:APD",
pages = "",
year = "1997",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6110P (Parallel programming); C6115 (Programming
support); C6120 (File organisation); C6140D (High level
languages); C7430 (Computer engineering)",
corpsource = "Dept. of Comput. Sci., Changsha Inst. of Technol.,
Hunan, China",
keywords = "distributed; FORTRAN; FORTRAN language; GKD-VSM;
memory environments; multithread scheme; parallel
programming; parallel programming model; Prefetch and
Poststore; programming environments; PVM; shared
memory; software overhead; software virtual shared
memory; synchronisation; user-level; virtual machines;
virtual storage",
treatment = "P Practical",
}
@Article{Exbrayat:1997:OPS,
author = "M. Exbrayat and H. Kosch",
title = "Offering Parallelism to a Sequential Database
Management System on a Network of Workstations Using
{PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "457--435",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fachat:1997:IEB,
author = "Andr{\'e} Fachat and Karl Heinz Hoffmann",
title = "Implementation of {Ensemble-Based Simulated Annealing}
with dynamic load balancing under {MPI}",
journal = j-COMP-PHYS-COMM,
volume = "107",
number = "1--3",
pages = "49--53",
month = dec,
year = "1997",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/S0010-4655(97)00096-9",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 21:30:21 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465597000969",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@TechReport{Fagg:1997:HMAa,
author = "G. Fagg and J. Dongarra and A. Geist",
title = "Heterogeneous {MPI} Application Interoperation and
Process Management under {PVMPI}",
type = "Technical report",
number = "CS-97-???",
institution = inst-UTK,
address = inst-UTK:adr,
month = jun,
year = "1997",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/utk/papers/pvmmpi97.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/pvmmpi97.pdf",
acknowledgement = ack-nhfb,
}
@Article{Fagg:1997:HMAb,
author = "G. E. Fagg and J. J. Dongarra and A. Geist",
title = "Heterogeneous {MPI} Application Interoperation and
Process Management under {PVMPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "91--98",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fang:1997:MDD,
author = "Niandong Fang and Helmar Burkhart",
title = "{MPI-DDL}: a distributed-data library for {MPI}",
journal = j-FUT-GEN-COMP-SYS,
volume = "12",
number = "5",
pages = "407--419",
day = "1",
month = apr,
year = "1997",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Feb 27 12:41:16 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/0167739X;
http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/19/19/27/17/23/abstract.html",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@TechReport{Fin:1997:CPM,
author = "Torsten Fin",
title = "Comparing the performance of {MPI}, {PVM}, and {CORBA}
on {Ethernet LANs}",
type = "{Berichte zur Rechnerarchitektur}",
number = "3(4)",
institution = "Institut f{\"u}r Informatik, Lehrstuhl f{\"u}r
Rechnerarchitektur und -kommunikation,
Friedrich-Schiller-Universit{\"a}t Jena",
address = "Jena, Germany",
pages = "12",
year = "1997",
bibdate = "Wed Aug 27 06:51:17 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Fischer:1997:AAP,
author = "Markus Fischer and Jack Dongarra",
editor = "????",
booktitle = "{Concurrent Computing Conference, Atlanta, GA, March
10--11, 1994}",
title = "Another Architecture: {PVM} on {Windows 95\slash NT}",
publisher = "????",
address = "????",
pages = "??--??",
year = "1997",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/nt-paper.ps;
http://www.netlib.org/utk/people/JackDongarra/pdf/nt-paper.pdf",
acknowledgement = ack-nhfb,
}
@Article{Fischer:1997:ESP,
author = "M. Fischer and J. Simon",
title = "Embedding {SCI} into {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "177--184",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Foster:1997:MMC,
author = "Ian Foster and Jonathan Geisler and Carl Kesselman and
Steven Tuecke",
title = "Managing Multiple Communication Methods in
High-Performance Networked Computing Systems",
journal = j-J-PAR-DIST-COMP,
volume = "40",
number = "1",
pages = "35--48",
day = "10",
month = jan,
year = "1997",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1996.1266",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:19:01 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1266/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1266/production/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1266/production/ref",
acknowledgement = ack-nhfb,
classification = "B6150M (Protocols); B6210L (Computer
communications); C5440 (Multiprocessing systems); C5470
(Performance evaluation and testing); C5640
(Protocols); C5670 (Network performance)",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "Argonne MPICH library; computer networks; computing
systems; criteria; heterogeneous networked environment;
high-performance networked; message passing; message
passing interface; multimethod communication; multiple
communication methods; multithreaded runtime system;
networked computing environments; Nexus; Nexus-based
MPI implementation; performance characteristics;
performance evaluation; protocols; remote service
request mechanisms; transport mechanisms;
user-specified selection",
treatment = "P Practical",
}
@TechReport{Gabriel:1997:EMU,
author = "Edgar Gabriel and Thomas Beisel and Michael Resch",
title = "{Erweiterung einer MPI-Umgebung zur
Interoperabilit{\"a}t verteilter MPP-Systeme}.
({German}) [{Extension} of an {MPI} environment for
interoperability with distributed {MPI} systems]",
type = "{Studienarbeit angewandte Informatik}",
number = "RUS 37",
institution = "Rechenzentrum Universit{\"a}t Stuttgart",
address = "Stuttgart, Germany",
year = "1997",
bibdate = "Wed Aug 27 06:55:46 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
language = "German",
}
@Article{Galaktionov:1997:MST,
author = "A. S. Galaktionov and P. D. Anderson and G. W. M.
Peters",
title = "Mixing Simulations: Tracking Strongly Deforming Fluid
Volumes in {3D} Flows",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "436--469",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Galibert:1997:YCL,
author = "O. Galibert",
title = "{YLC}, {A C++ Linda} System on Top of {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "99--106",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{GarciaSalcines:1997:PRR,
author = "E. {Garcia Salcines} and G. {Cerruela Garcia} and J.
I. {Benavides Benitez} and F. {Mu{\~n}oz Garcia}",
title = "Parallel Rendering of Radiance on Distributed Memory
System by {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "502--507",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Geist:1997:ACP,
author = "G. A. Geist",
title = "Advanced Capabilities in {PVM 3.4}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "107--115",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Geist:1997:BPW,
author = "G. A. Geist and J. A. Kohl and P. M. Papadopoulos and
S. L. Scott",
title = "Beyond {PVM 3.4}: What We've Learned, What's Next, and
Why",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "116--126",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Geist:1997:CPF,
author = "G. A. {Geist, II} and James Arthur Kohl and Philip M.
Papadopoulos",
title = "{CUMULVS}: Providing Fault Tolerance, Visualization,
and Steering of Parallel Applications",
journal = j-IJSAHPC,
volume = "11",
number = "3",
pages = "224--235",
month = "Fall",
year = "1997",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Wed Jul 23 11:38:50 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
}
@Article{Gerlach:1997:ECS,
author = "J. Gerlach and M. Sato and Y. Ishikawa",
title = "Experiences with the {C++} Standard Template Library
and {MPI} for a Parallel Particle Simulation Method",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1225",
pages = "961--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Aug 22 11:59:49 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gillett:1997:UMC,
author = "Richard Gillett and Richard Kaufmann",
title = "Using the {Memory Channel Network} --- Using a cluster
of standard {PCI-based} servers with a low-cost network
to improve communication performance",
journal = j-IEEE-MICRO,
volume = "17",
number = "1",
pages = "19--25",
month = jan # "\slash " # feb,
year = "1997",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/40.566189",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Mon Apr 7 14:39:59 MDT 1997",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Digital Equipment Corp",
affiliationaddress = "MA, USA",
classcodes = "C5610N (Network interfaces); C5620 (Computer networks
and techniques)",
classification = "716.1; 722.1; 722.3; 722.4; 723.1; 723.2",
corpsource = "Digital Equip. Corp., USA",
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
journalabr = "IEEE Micro",
keywords = "Bandwidth; clusters; Coding errors; Communication
channels (information theory); Computer networks;
computer networks; Computer software; Data
communication systems; Data handling; Data storage
equipment; Data transfer; DEC computers; Digital;
Latency; Lock acquisition; Lock release; Memory
channel; Memory Channel; Memory Channel Network;
Message passing; Message size; message-passing; network
for; network interfaces; Parallel processing systems;
PCI bus; Performance; Raw message passing; Storage
allocation (computer); Universal message passing;
UNIX",
treatment = "P Practical",
}
@Article{Goumopoulos:1997:PCS,
author = "C. Goumopoulos and E. Housos and O. Liljenzin",
title = "Parallel Crew Scheduling on Workstation Networks Using
{PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "470--477",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Grabowsky:1997:MBK,
author = "Lothar Grabowsky",
title = "{MPI-basierte Koppelrandkommunikation und
Einfl{\"u}{\ss} der Partitionierung im 3D-Fall}.
({German}) [{MPI}-based coupled edge communication and
influence of partitioning in {3D-Fall}]",
type = "Preprint-Reihe des Chemnitzer SFB 393",
number = "97,17",
institution = "Universit{\"a}t Chemnitz-Zwickau",
address = "Chemnitz, Germany",
pages = "13",
year = "1997",
bibdate = "Wed Aug 27 06:53:21 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
language = "German",
}
@Article{Grecki:1997:MPE,
author = "M. Grecki and G. Jablonski and A. Napieralski",
title = "{MOPS} --- Parallel Environment for Simulation of
Electronic Circuits Using Physical Models of
Semiconductor Devices",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "478--485",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:1997:HPM,
author = "William Gropp and Ewing Lusk",
title = "A high-performance {MPI} implementation on a
shared-memory vector supercomputer",
journal = j-PARALLEL-COMPUTING,
volume = "22",
number = "11",
pages = "1513--1526",
day = "26",
month = jan,
year = "1997",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:14:43 MDT 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=22&issue=11;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=22&issue=11&aid=1113",
acknowledgement = ack-nhfb,
affiliation = "Argonne Natl Lab",
affiliationaddress = "IL",
classification = "722.1; 722.2; 722.4; 921.1; C5220P (Parallel
architecture); C5610N (Network interfaces); C6150N
(Distributed systems software)",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "Data storage equipment; Interfaces (computer); message
passing; Message passing interface (mpi);
Message-Passing Interface; MPI implementation; MPIC;
NEC SX-4; network interfaces; parallel; Parallel
processing systems; Shared memory multiprocessors;
shared memory systems; shared-memory programming;
shared-memory vector supercomputer; standards;
supercomputer; Supercomputers; Vectors",
treatment = "P Practical",
}
@Article{Gropp:1997:SMC,
author = "W. Gropp and E. Lusk",
title = "Sowing {MPICH}: a Case Study in the Dissemination of a
Portable Environment for Parallel Scientific
Computing",
journal = j-IJSAHPC,
volume = "11",
number = "2",
pages = "103--114",
month = "Summer",
year = "1997",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Thu Jun 26 18:17:48 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
}
@Article{Gropp:1997:WPM,
author = "W. Gropp and E. Lusk",
title = "Why Are {PVM} and {MPI} So Different?",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "3--10",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Guan:1997:PDI,
author = "Huiwei Guan and Chi-kwong Li and To-yat Cheung and
Songnian Yu",
title = "Parallel design and implementation of {SOM} neural
computing model in {PVM} environment of a distributed
system",
crossref = "IEEE:1997:APD",
pages = "26--31",
year = "1997",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C1230D (Neural nets); C5220P (Parallel
architecture); C5290 (Neural computing techniques)",
conflocation = "Shanghai, China; 19-21 March 1997",
conftitle = "Proceedings. Advances in Parallel and Distributed
Computing",
corpsource = "Dept. of Comput. Sci., City Univ. of Hong Kong, Hong
Kong",
keywords = "architectures; distributed; machines; message passing;
neural net architecture; parallel; parallel virtual
machine; PVM environment; self-organising feature maps;
SOM neural computing model; system; virtual",
treatment = "T Theoretical or Mathematical",
}
@Article{Hempel:1997:IMN,
author = "R. Hempel and H. Ritzdorf and F. Zimmermann",
title = "Implementation of {MPI} on {NEC}'s {SX-4} Multi-Node
Architecture",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "185--193",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hoyos-Rivera:1997:UPB,
author = "G. J. Hoyos-Rivera and V. G. Sanchez-Arias",
title = "Using {PVM} to Build an Interface to Support
Cooperative Work in a Distributed Systems Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "127--134",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Hwang:1997:EMC,
author = "Kai Hwang and Choming Wang and Cho-Li Wang",
title = "Evaluating {MPI} collective communication on the
{SP2}, {T3D}, and {Paragon} multicomputers",
crossref = "IEEE:1997:TIS",
pages = "106--115",
year = "1997",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessing systems); C5470 (Performance
evaluation and testing)",
conftitle = "Proceedings Third International Symposium on
High-Performance Computer Architecture",
corpsource = "Hong Kong Univ., Hong Kong",
keywords = "architectural support; closed-form expressions; Cray
T3D; IBM SP2; Intel Paragon; message passing; MPI
collective communication evaluation; multiprocessing
systems; Paragon multicomputers; performance
evaluation; STAP benchmark experiments; startup
latency; synchronisation; timing; timing performance",
sponsororg = "IEEE Computer. Soc. Tech. Committee on Comput.
Archit",
treatment = "P Practical",
}
@Article{Jabbarzadeh:1997:PSS,
author = "A. Jabbarzadeh and J. D. Atkinson and R. I. Tanner",
title = "Parallel simulation of shear flow of polymers between
structured walls by molecular dynamics simulation on
{PVM}",
journal = j-COMP-PHYS-COMM,
volume = "107",
number = "1--3",
pages = "123--136",
month = dec,
year = "1997",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/S0010-4655(97)00088-X",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 21:30:21 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S001046559700088X",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Jackson:1997:SYE,
author = "D. J. Jackson and C. W. Humphres",
title = "A simple yet effective load balancing extension to the
{PVM} software system",
journal = j-PARALLEL-COMPUTING,
volume = "22",
number = "12",
pages = "1647--1660",
day = "21",
month = feb,
year = "1997",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=22&issue=12;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=22&issue=12&aid=1112",
acknowledgement = ack-nhfb,
classification = "C4140 (Linear algebra); C5260B (Computer vision and
image processing techniques); C6110P (Parallel
programming); C6150E (General utility programs); C6150N
(Distributed systems software)",
corpsource = "Dept. of Electr. Eng., Alabama Univ., Tuscaloosa, AL,
USA",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
keywords = "algorithms; application program interfaces; coding;
data compression; host CPU load information; image;
information gathering; load; load balancing; load
balancing extension; master process; matrix algebra;
matrix oriented; NAS parallel benchmarks; parallel;
parallel algorithms; parallel fractal image compression
algorithm; parallel runtime performance; process spawn;
processes; programming; programming interface; PVM
software system; resource allocation; slave",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@Article{Kacsuk:1997:GDD,
author = "Peter Kacsuk and Jose C. Cunha and Gabor Dozsa and
Joao Lourenco and Tibor Fadgyas and Tiago Antao",
title = "A graphical development and debugging environment for
parallel programs",
journal = j-PARALLEL-COMPUTING,
volume = "22",
number = "13",
pages = "1747--1770",
day = "28",
month = feb,
year = "1997",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Oct 21 15:14:48 MDT 1997",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=22&issue=13;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=22&issue=13&aid=1126",
acknowledgement = ack-nhfb,
affiliation = "Hungarian Acad of Sciences",
affiliationaddress = "Budapest, Hung",
classification = "722.2; 722.4; 723.1; 723.1.1; 723.5; C6110P
(Parallel programming); C6110V (Visual programming);
C6115 (Programming support); C6150G (Diagnostic,
testing, debugging and evaluating systems); C6180G
(Graphical user interfaces)",
conference = "Proceedings of the 1996 Workshop on Distributed and
Parallel Systems, DAPSYS",
corpsource = "KFKI-MSZKI Res. Inst. for Meas. and Comput. Tech.,
Hungarian Acad. of Sci., Budapest, Hungary",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "abstraction mechanism; complex programming
environment; Computer aided software engineering;
Computer programming; Computer programming languages;
data visualization; debugging; debugging engine;
debugging environment; distributed; Distributed
computer systems; Distributed debugging engine;
distributed memory computer architectures; GRADE;
graphical; graphical development; graphical user
interface; Graphical user interfaces; graphical user
interfaces; GRAPNEL; high-level graphical support;
language; languages; machine; message-; parallel;
Parallel processing systems; parallel programming;
Parallel programs; parallel virtual; Parallel virtual
machine; passing parallel programs; performance
monitoring; program; Program debugging; programming
environments; programs; PROVE; Software Package grade;
Software Package grapnel; software tools; Tape/PVM;
visual",
meetingaddress = "Miskolc, Hung",
meetingdate = "Oct 1996",
meetingdate2 = "10/96",
treatment = "A Application; P Practical",
}
@Article{Kitowski:1997:CPM,
author = "J. Kitowski and K. Boryczko and J. Moscinski",
title = "Comparison of {PVM} and {MPI} Performance in
Short-Range Molecular Dynamics Simulation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "11--16",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Konuru:1997:MUL,
author = "Ravi B. Konuru and Steve W. Otto and Jonathan
Walpole",
title = "A Migratable User-Level Process Package for {PVM}",
journal = j-J-PAR-DIST-COMP,
volume = "40",
number = "1",
pages = "81--102",
day = "10",
month = jan,
year = "1997",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1996.1270",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:19:01 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1270/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1270/production/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1270/production/ref",
acknowledgement = ack-nhfb,
classification = "C4240C (Computational complexity); C5440
(Multiprocessing systems); C6110P (Parallel
programming); C6115 (Programming support); C6150G
(Diagnostic, testing, debugging and evaluating
systems); C7430 (Computer engineering)",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "adaptive load distribution; application debugging;
application transparency; application-transparent
migration; availability; based programming model;
computational complexity; computing; distributed memory
multiprocessor; dynamic environment; dynamic
multiprocessor environment; machine; machines; message
passing; message-; microbenchmarks; migratable
user-level process package; parallel; parallel
programming; parallel virtual; program debugging; PVM;
system load; unobtrusive; unpredictable variability;
user-level process; virtual; virtual processor;
workstation; workstation networks; workstation
ownership",
treatment = "A Application; P Practical",
}
@Article{Kormicki:1997:PLS,
author = "Maciek Kormicki and Ausif Mahmood and Bradley S.
Carlson",
title = "Parallel logic simulation on a network of workstations
using parallel virtual machine",
journal = j-TODAES,
volume = "2",
number = "2",
pages = "123--134",
month = jan,
year = "1997",
CODEN = "ATASFO",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Jul 27 10:05:33 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/articles/journals/todaes/1997-2-2/p123-kormicki/p123-kormicki.pdf;
http://www.acm.org/pubs/citations/journals/todaes/1997-2-2/p123-kormicki/",
abstract = "This paper explores parallel logic simulation on a
network of workstations using a parallel virtual
machine (PVM). A novel parallel implementation of the
centralized-time event-driven logic simulation
algorithm is carried out such that no global
controlling workstation is needed to synchronize the
advance of simulation time. Further advantages of our
new approach include a random partitioning of the
circuit onto available workstations and a pipelined
execution of the different phases of the simulation
algorithm. To achieve a better load balance, we employ
a semioptimistic scheme for gate evaluations (in
conjunction with a centralized-time algorithm) such
that no rollback is required. The performance of this
implementation has been evaluated using the ISCAS
benchmark circuits. Speedups improve with the size of
the circuit and the activity level in the circuit.
Analyses of the communication overhead show that the
techniques developed here will yield even higher gains
as newer networking technologies like ATM are employed
to connect workstations.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
generalterms = "Algorithms; Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "distributed computing; parallel logic simulation; PVM;
synchronous simulation",
subject = "Hardware --- Logic Design --- Design Aids (B.6.3):
{\bf Simulation}; Hardware --- Integrated Circuits ---
Design Aids (B.7.2): {\bf Simulation}",
}
@Article{Krantz:1997:CSC,
author = "A. T. Krantz and V. S. Sunderam",
title = "Client Server Computing on Message Passing Systems:
Experiences with {PVM-RPC}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1300",
pages = "110--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Krotz-Vogel:1997:PPP,
author = "W. Krotz-Vogel and H.-C. Hoppe",
title = "The {PALLAS} Parallel Programming Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "257--266",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Lauria:1997:MFH,
author = "Mario Lauria and Andrew Chien",
title = "{MPI-FM}: High Performance {MPI} on Workstation
Clusters",
journal = j-J-PAR-DIST-COMP,
volume = "40",
number = "1",
pages = "4--18",
day = "10",
month = jan,
year = "1997",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1996.1264",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:19:01 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1264/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1264/production/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1996.1264/production/ref",
acknowledgement = ack-nhfb,
classification = "B6150M (Protocols); B6210L (Computer
communications); C5440 (Multiprocessing systems); C5470
(Performance evaluation and testing); C5620L (Local
area networks); C5640 (Protocols); C5670 (Network
performance)",
corpsource = "Dipartimento di Inf. e Sistemistica, Naples Univ.,
Italy",
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
keywords = "application level; bandwidth; communication layers;
communication performance; communication pipeline
stages; communication software; Cray; Cray T3D; design
solutions; evaluation; fast messages library; hardware
performance; high; high level messaging library; high
performance MPI; high speed LANs; IBM SP2; interface;
latency; level messaging layer; local area networks;
low; low level; low level communication layers; low
level messaging layer; message passing; message passing
interface; minimum; minimum one-way latency; MPI-FM;
Myrinet network; one-way latency; performance;
performance evaluation; protocols; SPARCstation 20
workstations; speed LANs; T3D; workstation clusters;
workstations",
treatment = "A Application; P Practical",
}
@InProceedings{Li:1997:EHC,
author = "Konming Gary Li and Nabil M. Zamel",
title = "An Evaluation of {HPF} Compilers and the
Implementation of a Parallel Linear Equation Solver
Using {HPF} and {MPI}",
crossref = "ACM:1997:SHP",
pages = "??--??",
year = "1997",
bibdate = "Sat Mar 21 08:51:09 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.supercomp.org/sc97/proceedings/TECH/LI/INDEX.HTM",
acknowledgement = ack-nhfb,
}
@Article{Li:1997:PIO,
author = "Wei Li and Xiaohu Huang and Nanning Zheng",
title = "Parallel implementing {OpenGL} on {PVM}",
journal = j-PARALLEL-COMPUTING,
volume = "23",
number = "12",
pages = "1839--1850",
day = "15",
month = dec,
year = "1997",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:15:16 MDT 1999",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1997&volume=23&issue=12;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_sub/browse/browse.cgi?year=1997&volume=23&issue=12&aid=1248",
acknowledgement = ack-nhfb,
affiliation = "Xi'an Jiaotong Univ",
affiliationaddress = "Xi'an, China",
classification = "722.4; 723.2",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "Algorithms; Data decomposition; Image processing; Load
balancing; Parallel processing systems; Parallel
virtual machine; Task granularity; Three dimensional;
Virtual reality",
}
@Article{Lu:1997:QPD,
author = "Honghui Lu and Sandhya Dwarkadas and Alan L. Cox and
Willy Zwaenepoel",
title = "Quantifying the Performance Differences between {PVM}
and {TreadMarks}",
journal = j-J-PAR-DIST-COMP,
volume = "43",
number = "2",
pages = "65--78",
day = "15",
month = jun,
year = "1997",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.1997.1332",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 9 09:19:03 MST 2000",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.1997.1332/production;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1997.1332/production/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.1997.1332/production/ref",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Ludwig:1997:OUI,
author = "T. Ludwig and R. Wismueller",
title = "{OMIS 2.0} --- a Universal Interface for Monitoring
Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "267--276",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Luecke:1997:HPF,
author = "G. R. Luecke and J. J. Coyle",
title = "{High Performance Fortran} versus explicit message
passing on the {IBM SP-2} for the parallel {LU}, {QR},
and {Cholesky} factorizations",
journal = j-SUPERCOMPUTER,
volume = "13",
number = "2",
pages = "4--14",
month = "????",
year = "1997",
CODEN = "SPCOEL",
ISSN = "0168-7875",
bibdate = "Wed Mar 18 08:37:01 MST 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C4140 (Linear algebra); C5440 (Multiprocessing
systems); C6110P (Parallel programming); C6150N
(Distributed systems software); C7310 (Mathematics
computing)",
corpsource = "Iowa State Univ., Ames, IA, USA",
fjournal = "Supercomputer",
keywords = "BLACS; BLAS; Cholesky factorizations; ESSL library;
explicit message passing; FORTRAN; High Performance
Fortran; high-performance parallel implementations; IBM
computers; IBM SP- 2; LU factorizations; mathematics
computing; matrix decomposition; message passing; MPI
version; parallel computer; parallel languages;
parallel machines; parallel programming; QR
factorizations; SCALAPACK; software development;
software libraries; software maintenance; software
performance evaluation; Visual Numerics",
pubcountry = "Netherlands",
treatment = "P Practical",
}
@Article{Manegold:1997:QBM,
author = "S. Manegold and F. Waas and D. Gudlat",
title = "In Quest of the Bottleneck --- Monitoring Parallel
Database Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "277--284",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mazzariol:1997:PCS,
author = "M. Mazzariol and B. A. Gennart and V. Messerli and R.
D. Hersch",
title = "Performance of {CAP}-Specified Linear Algebra
Algorithms",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "351--358",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{McDonald:1997:IPT,
author = "Chris McDonald and Kamran Kazemi",
title = "Improving the {PVM} teaching environment",
journal = j-SIGCSE,
volume = "29",
number = "1",
pages = "219--223",
month = mar,
year = "1997",
CODEN = "SIGSD3",
DOI = "https://doi.org/10.1145/268085.268167",
ISSN = "0097-8418 (print), 2331-3927 (electronic)",
ISSN-L = "0097-8418",
bibdate = "Sat Nov 17 18:57:38 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigcse1990.bib",
abstract = "The parallel programming community has long recognized
the need for a simple programming environment offering
interprocess communication between heterogeneous
systems. As the Parallel Virtual Machine environment,
PVM, has emerged to meet this goal, an increasing
number of educational institutions are choosing PVM to
support their teaching of parallel and distributed
computing using networks of workstations. However, it
is often the nature of PVM's design and implementation
that can severely limit its success in a teaching
environment. This paper first motivates and then
describes improvements to the PVM environment which
increase both robustness and efficiency in an
educational setting.",
acknowledgement = ack-nhfb,
fjournal = "SIGCSE Bulletin (ACM Special Interest Group on
Computer Science Education)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688",
}
@Article{Mintchev:1997:TPM,
author = "S. Mintchev and V. Getov",
title = "Towards Portable Message Passing in {Java}: Binding
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "135--142",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mysliwiec:1997:CAM,
author = "G. Mysliwiec and J. Sipowicz and R. Schaefer",
title = "Control Activities in Message Passing Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "143--150",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mysliwiec:1997:IPS,
author = "G. Mysliwiec and J. Sipowicz and H. Burkhart",
title = "Implementing Parallel {SBS}-Type Linear Solvers Using
{ALWAN}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "359--366",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Overeinder:1997:BCD,
author = "B. J. Overeinder and P. M. A. Sloot",
title = "Breaking the Curse of Dynamics by Task Migration:
Pilot Experiments in the {Polder Metacomputer}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "194--207",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Book{Pacheco:1997:PPM,
author = "Peter S. Pacheco",
title = "Parallel programming with {MPI}",
publisher = pub-MORGAN-KAUFMANN,
address = pub-MORGAN-KAUFMANN:adr,
pages = "xxii + 418",
year = "1997",
ISBN = "1-55860-339-5",
ISBN-13 = "978-1-55860-339-4",
LCCN = "QA76.642 .P3 1997",
bibdate = "Fri Feb 04 17:32:19 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Peinado:1997:HPC,
author = "M. Peinado and R. Venkatesan",
title = "Highly Parallel Cryptographic Attacks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "367--374",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Pernice:1997:BRM,
author = "Michael Pernice",
title = "Book Review: {{\em MPI: The Complete Reference}}",
journal = j-IEEE-CONCURR,
volume = "5",
number = "1",
pages = "80--81",
month = jan # "\slash " # mar,
year = "1997",
CODEN = "IECMFX",
DOI = "https://doi.org/10.1109/MCC.1997.580453",
ISSN = "1092-3063 (print), 1558-0849 (electronic)",
ISSN-L = "1092-3063",
bibdate = "Tue Jan 16 06:49:26 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/pd/books/pd1997/pdf/p1080.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE Concurrency",
}
@Article{Petcu:1997:ISM,
author = "D. Petcu",
title = "Implementation of Some Multiprocessor Algorithms for
{ODEs} Using {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "375--382",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Piernas:1997:APM,
author = "J. Piernas and A. Flores and J. M. Garcia",
title = "Analyzing the Performance of {MPI} in a Cluster of
Workstations Based on {Fast Ethernet}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "17--24",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Reinhard:1997:MHP,
author = "E. Reinhard and A. Chalmers",
title = "Message Handling in Parallel Radiance",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "486--493",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Resch:1997:CMP,
author = "M. Resch and H. Berger and T. Boenisch",
title = "A Comparison of {MPI} Performance on Different
{MPPs}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "25--32",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Resch:1997:PM,
author = "Michael Resch and Thomas Beisel and Holger Berger",
title = "{PACX-MPI}",
type = "{BI: Informationen f{\"u}r Nutzer des
Rechenzentrums}",
number = "1997,11/12",
institution = "Universit{\"a}t Stuttgart, Zentrale
Universit{\"a}tseinrichtung",
address = "Stuttgart, Germany",
year = "1997",
bibdate = "Wed Aug 27 07:18:18 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@TechReport{Resch:1997:PMC,
author = "Michael Resch and Holger Berger and Thomas
B{\"o}nisch",
title = "Performance of {MPI} on a {Cray T3E-512}",
type = "{BI: Informationen f{\"u}r Nutzer des
Rechenzentrums}",
number = "1997,5/6",
institution = "Universit{\"a}t Stuttgart, Zentrale
Universit{\"a}tseinrichtung",
address = "Stuttgart, Germany",
pages = "??",
year = "1997",
bibdate = "Wed Aug 27 07:14:37 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Third European CRAY-SGI MPP Workshop.",
acknowledgement = ack-nhfb,
}
@Article{Roda:1997:PPI,
author = "J. L. Roda and C. Rodriguez and F. Almeida and D.
Gonzalez-Morales",
title = "Predicting the Performance of Injection Communication
Patterns on {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "33--40",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Rough:1997:PRD,
author = "J. Rough and A. Goscinski and D. {De Paoli}",
title = "{PVM} on the {RHODOS} Distributed Operating System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "208--218",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Roy:1997:PNT,
author = "R. Roy and Z. Stankovski",
title = "Parallelization of Neutron Transport Solvers",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "494--501",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Santos:1997:ECP,
author = "L. P. Santos and V. Castro and A. Proenca",
title = "Evaluation of the Communication Performance on a
Parallel Processing System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "41--48",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Saphir:1997:SMI,
author = "William Saphir",
title = "A Survey of {MPI} Implementations",
journal = "NHSE Review",
volume = "2",
number = "1",
pages = "??--??",
month = nov,
year = "1997",
bibdate = "Wed Jan 14 05:59:12 2004",
bibsource = "http://www.crpc.rice.edu/NHSEreview/96-1.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "National HPCC Software Exchange (NHSE); Rice
University",
}
@Article{Serot:1997:EPF,
author = "J. Serot",
title = "Embodying Parallel Functional Skeletons: An
Experimental Implementation on Top of {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1300",
pages = "629--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Silva:1997:IPD,
author = "Luis M. Silva and Joao Gabriel Silva and Simon
Chapple",
title = "Implementation and Performance of {DSMPI}",
journal = j-SCI-PROG,
volume = "6",
number = "2",
pages = "201--214",
month = "Summer",
year = "1997",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
ftp://ftp.ira.uka.de/bibliography/Parallel/dsm.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
xxauthor = "L. M. Silva and S. Chapple and J. G. Silva",
xxpages = "210--214",
}
@Article{Soch:1997:PGP,
author = "M. Soch and P. Tvrdik and M. Volf",
title = "Parallel Graph-Partitioning Using the Mob Heuristic",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "383--389",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Song:1997:ALL,
author = "Jianjian Song and Heng Kek Choo and Kuok Ming Lee",
title = "Application-level load migration and its
implementation on top of {PVM}",
journal = j-CPE,
volume = "9",
number = "1",
pages = "1--19",
month = jan,
year = "1997",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Wed Apr 16 06:39:19 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C6150J (Operating systems); C6150N (Distributed
systems software)",
corpsource = "Nat. Supercomput. Res. Center, Nat. Univ. of
Singapore, Singapore",
fjournal = "Concurrency, practice and experience",
keywords = "concurrency; load migration; location transparency;
network operating systems; operating systems
(computers); parallel processing; process migration;
PVM; receive buffer; residual dependency; resource
allocation; virtual machines",
pubcountry = "UK",
treatment = "P Practical",
}
@Article{Souza:1997:EPH,
author = "P. S. Souza and L. J. Senger and M. J. Santana and R.
C. Santana",
title = "Evaluating Personal High Performance Computing with
{PVM} on {Windows} and {LINUX} Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "49--56",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Squyres:1997:DEM,
author = "J. M. Squyres and B. Saphir and A. Lumsdaine",
title = "The Design and Evolution of the {MPI-2 C++}
Interface",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1343",
pages = "57--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Stellner:1997:LBB,
author = "G. Stellner and J. Trinitis",
title = "Load Balancing Based on Process Migration for {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1300",
pages = "150--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Strietzel:1997:PTS,
author = "M. Strietzel",
title = "Parallel Turbulence Simulation: Resolving the Inertial
Subrange of {Kolmogorov}'s Spectra",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "508--516",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sunderam:1997:TAS,
author = "V. Sunderam and B. Topol and S. Moyer and A. Krantz",
title = "Tools and Auxiliary Subsystems in {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "285--294",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Theodoropoulos:1997:GSP,
author = "P. Theodoropoulos and P. Tsanakas and G.
Papakonstantinou",
title = "Global Semaphores in a Parallel Programming
Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "151--158",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Uminski:1997:EEP,
author = "P. W. Uminski and M. R. Matuszek and H. Krawczyk",
title = "Experimental Evaluation of {PVM} Group Communication",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "57--66",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Book{vandeGeijn:1997:UPP,
author = "Robert A. {van de Geijn}",
title = "Using {PLAPACK}: Parallel Linear Algebra Package",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xvii + 194",
year = "1997",
ISBN = "0-262-72026-4",
ISBN-13 = "978-0-262-72026-7",
LCCN = "QA185.D37 V36 1997",
bibdate = "Fri Dec 19 10:39:21 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "With contributions by Philip Alpatov and others.",
price = "US\$27.50",
acknowledgement = ack-nhfb,
}
@Article{Vlassov:1997:SSM,
author = "V. Vlassov and L.- E. Thorelli",
title = "A Synchronizing Shared Memory: Model and Programming
Implementation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "159--166",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Wang:1997:TPD,
author = "Paul S. Wang",
title = "Tools for parallel\slash distributed mathematical
computation",
crossref = "ACM:1997:PPS",
pages = "188--195",
year = "1997",
bibdate = "Tue Sep 28 07:51:05 MDT 1999",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
affiliation = "Kent State Univ",
affiliationaddress = "USA",
classification = "722.3; 722.4; 723; 723.5; 921; 921.1",
keywords = "Algebra; Common Lisp; Computational methods; Computer
networks; Computer software; Data communication
systems; Interfaces (computer); Lisp (programming
language); Multi protocol (MP); Multiple instruction
multiple data (MIMD) parallel machines; Network
protocols; Parallel processing systems; Parallel
virtual machines (PVM); Program compilers; Symbolic and
algebraic computation (SAC); Virtual reality",
}
@Article{Winstanley:1997:PDP,
author = "N. Winstanley and J. O'Donnell",
title = "Parallel Distributed Programming with {Haskell+PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1300",
pages = "670--??",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wismueller:1997:DMP,
author = "R. Wismueller",
title = "Debugging Message Passing Programs Using Invisible
Message Tags",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "295--304",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wolf:1997:CMP,
author = "K. Wolf and E. Brakkee and D. P. Ho",
title = "Communication in Multi-Physics Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "167--176",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Yalamov:1997:BRT,
author = "Plamen Y. Yalamov and Svetozar Margenov",
title = "Book Reviews: Two books on {MPI}: {{\em Parallel
Programming with MPI}}; {{\em MPI: The Complete
Reference (2nd printing)}}",
journal = j-IEEE-CONCURR,
volume = "5",
number = "4",
pages = "81--81",
month = oct # "\slash " # dec,
year = "1997",
CODEN = "IECMFX",
DOI = "https://doi.org/10.1109/MCC.1997.580454",
ISSN = "1092-3063 (print), 1558-0849 (electronic)",
ISSN-L = "1092-3063",
bibdate = "Mon Jun 7 07:52:29 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/pd/books/pd1997/pdf/p4080.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE Concurrency",
}
@Article{Zhang:1997:DED,
author = "Xiaodong Zhang and Sandra G. Dykes and Hong Deng",
title = "Distributed Edge Detection: Issues and
Implementations",
journal = j-IEEE-COMPUT-SCI-ENG,
volume = "4",
number = "1",
pages = "72--82",
month = jan # "\slash " # mar,
year = "1997",
CODEN = "ISCEE4",
DOI = "https://doi.org/10.1109/99.590860",
ISSN = "1070-9924 (print), 1558-190X (electronic)",
ISSN-L = "1070-9924",
bibdate = "Sat Jan 9 08:57:23 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/cs/books/cs1997/pdf/c1072.pdf;
http://www.computer.org/cse/cs1998/c1072abs.htm",
abstract = "Experiments in parallelizing an edge detection
algorithm on three representative message-passing
architectures --- a low-cost, heterogeneous PVM
network, an Intel {iPSC\slash 860} hypercube, and a
{CM-5} massively parallel multicomputer --- provide
insight into implementation and performance issues for
image-processing applications.",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computational Science \& Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99",
}
@Article{Zilli:1997:TBN,
author = "G. Zilli and L. Bergamaschi",
title = "Truncated Block {Newton} and Quasi-{Newton} Methods
for Sparse Systems of Nonlinear Equations. Experiments
on Parallel Platforms",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1332",
pages = "390--400",
year = "1997",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Dec 9 06:27:54 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Book{Adamo:1998:MTO,
author = "Jean-Marc Adamo",
title = "Multi-threaded object-oriented {MPI}-based message
passing interface: the {ARCH} library",
volume = "SECS 446",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "xiv + 185",
year = "1998",
ISBN = "0-7923-8165-3",
ISBN-13 = "978-0-7923-8165-5",
LCCN = "TK5102.5.A293 1998",
bibdate = "Mon May 17 18:15:19 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "US\$120.00",
series = "The Kluwer international series in engineering and
computer science",
acknowledgement = ack-nhfb,
keywords = "data transmission systems; object-oriented programming
(computer science); threads (computer programs)",
libnote = "Not yet in my library.",
}
@Article{Alexandrov:1998:CGP,
author = "V. Alexandrov and F. Dehne and A. Rau-Chaplin and K.
Taft",
title = "Coarse Grained Parallel {Monte Carlo} Algorithms for
Solving {SLAE} Using {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "323--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Andersson:1998:PFT,
author = "U. Andersson",
title = "Parallelization of a {$3$D FD-TD} Code for the
{Maxwell} Equations Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1541",
pages = "12--19",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Sep 15 10:01:31 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1998b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "applied parallel computing; computing science; PARA;
parallel computing",
}
@TechReport{Andre:1998:BVN,
author = "Andr{\'e} Fachat and Karl Heinz Hoffmann",
title = "Blocking vs. non-blocking communication under {MPI} on
a Master-Workerproblem",
type = "{Preprint-Reihe des Chemnitzer SFB 393
Sonderforschungsbereich NumerischeSimulation auf Massiv
Parallelen Rechnern}",
number = "98,18",
institution = "Universit{\"a}t Chemnitz-Zwickau",
address = "Chemnitz, Germany",
year = "1998",
bibdate = "Wed Aug 27 07:09:52 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:1998:ANO,
author = "Anonymous",
title = "Announcements: New Official {Fortran} Technical
Reports; Working Group 5 Documents; {OpenGL} {Fortran
95} Bindings; {MPI} Module Provides Enhanced {Fortran}
Support; Variable Precision Arithmetic; {Fortran}
Information Sites; New {Fortran} Compiler Versions from
{Lahey} and {Fujitsu}; Downloadable Advanced {Fortran}
Textbook; {Fortran} Engineering Textbook",
journal = j-FORTRAN-FORUM,
volume = "17",
number = "3",
pages = "1--2",
month = dec,
year = "1998",
CODEN = "????",
ISSN = "1061-7264 (print), 1931-1311 (electronic)",
ISSN-L = "1061-7264",
bibdate = "Thu Feb 07 13:34:54 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Fortran Forum",
issue = "53",
}
@Article{Baker:1998:MNC,
author = "M. Baker",
title = "{MPI} on {NT}: The Current Status and Performance of
the Available Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "63--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Baker:1998:MNP,
author = "M. Baker and G. Fox",
title = "{MPI} on {NT}: a Preliminary Evaluation of the
Available Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1388",
pages = "549--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Berthou:1998:PHM,
author = "J.-Y. Berthou and L. Plagne",
title = "Parallel {HPF-MPI} Implementation of the {TBSCM}
{Poisson} Solver",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1401",
pages = "252--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Bhanot:1998:DTM,
author = "Gyan Bhanot",
title = "A $2$-d transpose {MPI} code",
type = "Research report",
number = "RC 21217",
institution = "T. J. Watson Research Center, IBM Corporation",
address = "Almaden, CA, USA",
year = "1998",
bibdate = "Wed Aug 27 07:16:38 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Browne:1998:RPA,
author = "Shirley Browne and Jack Dongarra and Kevin London",
title = "Review of Performance Analysis Tools for {MPI}
Parallel Programs",
journal = "NHSE Review",
volume = "3",
year = "1998",
CODEN = "????",
ISSN = "????",
bibdate = "Tue Feb 26 10:10:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Accepted, to appear.",
URL = "http://www.cs.utk.edu/~browne/perftools-review/",
acknowledgement = ack-nhfb,
keywords = "National HPCC Software Exchange (NHSE); Rice
University",
remark = "This journal ceased publication in 1997.",
}
@Article{Bubak:1998:PCL,
author = "M. Bubak and P. Luszczek and A. Wierzbowska",
title = "Porting {CHAOS} Library to {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "131--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Carissimi:1998:AEM,
author = "A. Carissimi and M. Pasin",
title = "{Athapascan}: An Experience on Mixing {MPI}
Communications and Threads",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "137--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ceron:1998:PID,
author = "C. Ceron and J. Dopazo and E. L. Zapata and J. M.
Carazo and O. Trelles",
title = "Parallel implementation of {DNAml} program on
message-passing architectures",
journal = j-PARALLEL-COMPUTING,
volume = "24",
number = "5--6",
pages = "701--716",
day = "1",
month = jun,
year = "1998",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sun Oct 25 09:30:12 MST 1998",
bibsource = "Compendex database;
http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1998&volume=24&issue=5-6;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cas/tree/store/parco/sub/1998/24/5-6/1279.pdf",
acknowledgement = ack-nhfb,
affiliation = "Univ of Malaga",
affiliationaddress = "Malaga, Spain",
classification = "722; 722.4; 723; 723.2",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
journalabr = "Parallel Comput",
keywords = "Computer architecture; Computer software; Message
passing computer architecture; Natural sciences
computing; Parallel algorithms; Parallel processing
systems; Parallel virtual machines (PVM)",
}
@Article{Chan:1998:PCT,
author = "K. J. Chan and A. M. Gibbons and M. Pias and W.
Rytter",
title = "On the {PVM} Computations of Transitive Closure and
Algebraic Path Problems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "338--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Chapman:1998:OHI,
author = "B. Chapman and P. Mehrotra",
title = "{OpenMP} and {HPF}: Integrating Two Paradigms",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1470",
pages = "650--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/hpfortran.bib;
http://www.math.utah.edu/pub/tex/bib/lncs1998b.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Chetlur:1998:ALE,
author = "M. Chetlur and G. D. Sharma and N. Abu-Ghazaleh and U.
K. V. Rajasekaran",
title = "An Active Layer Extension to {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "97--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Clark:1998:FOP,
author = "David Clark",
title = "Focus: {OpenMP}: a parallel standard for the masses",
journal = j-IEEE-CONCURR,
volume = "6",
number = "1",
pages = "10--12",
month = jan # "\slash " # mar,
year = "1998",
CODEN = "IECMFX",
DOI = "https://doi.org/10.1109/4434.656771",
ISSN = "1092-3063 (print), 1558-0849 (electronic)",
ISSN-L = "1092-3063",
bibdate = "Tue Jan 16 06:04:49 MST 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeeconcurrency.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/pd/books/pd1998/pdf/p1010.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE Concurrency",
}
@Article{Cotronis:1998:DMP,
author = "Y. Cotronis",
title = "Developing Message-Passing Applications on {MPICH}
under Ensemble",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "145--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Cunha:1998:MPP,
author = "J. C. Cunha and V. Duarte",
title = "Monitoring {PVM} Programs Using the {DAMS} Approach",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "273--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dagum:1998:OIS,
author = "Leonardo Dagum and Ramesh Menon",
title = "{OpenMP}: An Industry-Standard {API} for Shared-Memory
Programming",
journal = j-IEEE-COMPUT-SCI-ENG,
volume = "5",
number = "1",
pages = "46--55",
month = jan # "\slash " # mar,
year = "1998",
CODEN = "ISCEE4",
DOI = "https://doi.org/10.1109/99.660313",
ISSN = "1070-9924 (print), 1558-190X (electronic)",
ISSN-L = "1070-9924",
bibdate = "Sat Jan 9 08:57:23 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputscieng.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/cs/books/cs1998/pdf/c1046.pdf;
http://www.computer.org/cse/cs1998/c1046abs.htm",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computational Science \& Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=99",
}
@Article{Dantas:1998:ESM,
author = "M. A. R. Dantas and E. J. Zaluska",
title = "Efficient scheduling of {MPI} applications on networks
of workstations",
journal = j-FUT-GEN-COMP-SYS,
volume = "13",
number = "6",
pages = "489--499",
day = "20",
month = may,
year = "1998",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Feb 27 12:41:17 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/0167739X;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/19/19/28/20/21/abstract.html",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Delves:1998:HPF,
author = "M. Delves and H. Zima",
title = "{High Performance Fortran}: a Status Report or: Are We
Ready to Give Up {MPI}?",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "161--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dimov:1998:IMC,
author = "I. Dimov and V. Alexandrov and A. Karaivanova",
title = "Implementation of {Monte Carlo} Algorithms for
Eigenvalue Problem Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "346--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Espinosa:1998:ADP,
author = "A. Espinosa and T. Margalef and E. Luque",
title = "Automatic Detection of {PVM} Program Performance
Problems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "19--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fagg:1998:MMH,
author = "G. E. Fagg and K. S. London and J. J. Dongarra",
title = "{MPIConnect}: Managing Heterogeneous {MPI}
Applications Interoperation and Process Control",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "93--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Book{Fang:1998:DDL,
author = "Niandong Fang",
title = "Distributed data library and tools for an {MPI}
programming environment",
volume = "1",
publisher = "Shaker",
address = "Aachen, Germany",
pages = "xx + 195",
year = "1998",
ISBN = "3-8265-4101-4",
ISBN-13 = "978-3-8265-4101-8",
LCCN = "????",
bibdate = "Wed Aug 27 06:49:31 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Also published as dissertation of the University of
Basel.",
series = "Research reports in computer science",
acknowledgement = ack-nhfb,
}
@InProceedings{Ferrari:1998:JNPa,
author = "Adam J. Ferrari",
title = "{JPVM}: Network Parallel Computing in {Java}",
crossref = "ACM:1998:AWJ",
pages = "??--??",
year = "1998",
bibdate = "Thu Apr 27 10:43:08 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.cs.ucsb.edu/conferences/java98/papers/jpvm.pdf;
http://www.cs.ucsb.edu/conferences/java98/papers/jpvm.ps",
acknowledgement = ack-nhfb,
}
@Article{Ferrari:1998:JNPb,
author = "Adam Ferrari",
title = "{JPVM}: network parallel computing in {Java}",
journal = j-CPE,
volume = "10",
number = "11--13",
pages = "985--992",
month = sep,
year = "1998",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:44 MDT 1999",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
note = "Special Issue: Java for High-performance Network
Computing.",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=10050413;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=10050413&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Ferrari:1998:MDC,
author = "Adam Ferrari and V. S. Sunderam",
title = "Multiparadigm distributed computing with {TPVM}",
journal = j-CPE,
volume = "10",
number = "3",
pages = "199--228",
month = mar,
year = "1998",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:39 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=5374;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=5374&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Folino:1998:EMC,
author = "G. Folino and G. Spezzano and D. Talia",
title = "Evaluating and Modeling Communication Overhead of
{MPI} Primitives on the {Meiko CS-2}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "27--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Folino:1998:PEM,
author = "G. Folino and G. Spezzano and D. Talia",
title = "Performance Evaluation and Modelling of {MPI}
Communications on the {Meiko CS-2}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1401",
pages = "932--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Foster:1998:GEM,
author = "Ian Foster",
title = "A Grid-Enabled {MPI}: Message Passing in Heterogeneous
Distributed Computing Systems",
crossref = "ACM:1998:SHP",
pages = "??--??",
year = "1998",
bibdate = "Wed Oct 07 08:50:26 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.supercomp.org/sc98/papers/",
acknowledgement = ack-nhfb,
}
@Article{Foster:1998:WAI,
author = "Ian Foster and Jonathan Geisler and William Gropp and
Nicholas Karonis and Ewing Lusk and George
Thiruvathukal and Steven Tuecke",
title = "Wide-area implementation of the {Message Passing
Interface}",
journal = j-PARALLEL-COMPUTING,
volume = "24",
number = "12--13",
pages = "1735--1749",
day = "1",
month = nov,
year = "1998",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:15:40 MDT 1999",
bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1998&volume=24&issue=12-13;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cas/tree/store/parco/sub/1998/24/12-13/1352.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Fuerle:1998:IPC,
author = "T. Fuerle and E. Schikuta and C. Loeffelhardt and K.
Stockinger",
title = "On the Implementation of a Portable, Client-Server
Based {MPI-IO} Interface",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "172--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Garcia-Consuegra:1998:DGR,
author = "J. D. Garcia-Consuegra and J. A. Gallud and G.
Sebastian",
title = "Distributed Georeferring of Remotely Sensed
{Landsat-TM} Imagery Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1541",
pages = "161--166",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Sep 15 10:01:31 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1998b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "applied parallel computing; computing science; PARA;
parallel computing",
}
@Article{Geist:1998:HNG,
author = "G. A. Geist",
title = "{Harness}: The Next Generation Beyond {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "74--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gorlatch:1998:GMI,
author = "Sergei Gorlatch and Holger Bischof",
title = "A Generic {MPI} Implementation for a Data-Parallel
Skeleton: Formal Derivation and Application to {FFT}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "8",
number = "4",
pages = "447--??",
month = dec,
year = "1998",
CODEN = "PPLTEE",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Jan 6 12:02:34 MST 2005",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Goujon:1998:AAT,
author = "D. S. Goujon and M. Michel and J. Peeters and J. E.
Devaney",
title = "{AutoMap} and {AutoLink}: Tools for Communicating
Complex and Dynamic Data-Structures Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1362",
pages = "98--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Grabowsky:1998:NMP,
author = "Lothar Grabowsky and Thomas Ermer and J{\"o}rg
Werner",
title = "{Nutzung von MPI f{\"u}r parallele FEM-Systeme}.
({German}) [{Use} of {MPI} for parallel {FEM}
systems]",
type = "{Preprint-Reihe des Chemnitzer SFB 393
Sonderforschungsbereich NumerischeSimulation auf Massiv
Parallelen Rechnern }",
number = "97,08; RA-TR 02-97",
institution = "Universit{\"a}t Chemnitz-Zwickau",
address = "Chemnitz, Germany",
year = "1998",
bibdate = "Wed Aug 27 07:11:28 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
language = "German",
}
@Book{Gropp:1998:MCR,
author = "William Gropp and Steven Huss-Lederman and Andrew
Lumsdaine and Ewing Lusk and Bill Nitzberg and William
Saphir and Marc Snir",
title = "{MPI}: The Complete Reference. Volume 2, The {MPI-2}
Extensions",
publisher = pub-MIT,
address = pub-MIT:adr,
edition = "Second",
pages = "350",
year = "1998",
ISBN = "0-262-57123-4 (vol. 2), 0-262-69216-3 (set)",
ISBN-13 = "978-0-262-57123-4 (vol. 2), 978-0-262-69216-8 (set)",
LCCN = "QA76.642 .M65 1998",
bibdate = "Thu Oct 29 07:27:43 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See also volume 1 \cite{Snir:1998:MCR}.",
price = "US\$30 (paperback)",
series = "Scientific and Engineering Computation",
URL = "http://mitpress.mit.edu/book-home.tcl?isbn=0262571234",
acknowledgement = ack-nhfb,
}
@Article{Haimes:1998:UPM,
author = "R. Haimes and K. E. Jordan",
title = "Using {PVM} and {MPI} for Co-processed, Distributed
and Parallel Scientific Visualization",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1388",
pages = "1098--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hansen:1998:EMP,
author = "Per Brinch Hansen",
title = "An Evaluation of the {Message-Passing Interface}",
journal = j-SIGPLAN,
volume = "33",
number = "3",
pages = "65--72",
month = mar,
year = "1998",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Apr 30 08:30:23 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "The author criticizes MPI, and remarks ``MPI \ldots{}
lack[s] the elegance and security that can only by
checked by a parallel programming language.''",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Hatazaki:1998:RRS,
author = "T. Hatazaki",
title = "Rank Reordering Strategy for {MPI} Topology Creation
Functions",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "188--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Herland:1998:CML,
author = "B. G. Herland and M. Eberl and H. Hellwagner",
title = "A Common Messaging Layer for {MPI} and {PVM} over
{SCI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1401",
pages = "576--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Husbands:1998:MSD,
author = "Parry J. Husbands",
title = "{MPI-StarT}: Delivering Network Performance to
Numerical Applications",
crossref = "ACM:1998:SHP",
pages = "??--??",
year = "1998",
bibdate = "Wed Oct 07 08:50:26 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.supercomp.org/sc98/papers/",
acknowledgement = ack-nhfb,
}
@Article{Karlsson:1998:CCC,
author = "S. Karlsson and M. Brorsson",
title = "A Comparative Characterization of Communication
Patterns in Applications Using {MPI} and Shared Memory
on an {IBM SP2}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1362",
pages = "189--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Apr 28 08:51:33 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kemelmakher:1998:SAR,
author = "M. Kemelmakher and O. Kremien",
title = "Scalable and Adaptive Resource Sharing in {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "196--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kranzlmueller:1998:DPP,
author = "D. Kranzlmueller and J. Volkert",
title = "Debugging Point-to-Point Communication in {MPI} and
{PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "265--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kuhn:1998:FFW,
author = "Bob Kuhn",
title = "{Fortran Futures}: Workshop: {OpenMP} for Parallel
{Fortran} Applications",
journal = j-FORTRAN-FORUM,
volume = "17",
number = "3",
pages = "22--22",
month = dec,
year = "1998",
CODEN = "????",
ISSN = "1061-7264 (print), 1931-1311 (electronic)",
ISSN-L = "1061-7264",
bibdate = "Thu Feb 07 06:54:12 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran-forum.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Fortran Forum",
issue = "53",
}
@Article{Lavi:1998:IPD,
author = "R. Lavi and A. Barak",
title = "Improving the {PVM} Daemon Network Performance by
Direct Network Access",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "44--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Leung:1998:PAN,
author = "Ka-Cheong Leung and Mounir Hamdi",
title = "Performance assessment of network protocols and
parallel programming tools for distributed computing
systems",
journal = j-INT-J-COMPUT-SYST-SCI-ENG,
volume = "13",
number = "1",
pages = "67--80",
month = jan,
year = "1998",
CODEN = "CSSEEI",
ISSN = "0267-6192",
bibdate = "Thu Feb 4 13:21:32 MST 1999",
bibsource = "Compendex database;
http://www.math.utah.edu/pub/tex/bib/pvm.bib; OCLC
Contents1st database",
acknowledgement = ack-nhfb,
affiliation = "Hong Kong Univ of Science and Technology",
affiliationaddress = "Kowloon, Hong Kong",
classification = "722.2; 722.3; 722.4; 723.1; 723.2; 723.5",
fjournal = "International Journal of Computer Systems Science and
Engineering",
journalabr = "Comput Syst Sci Eng",
keywords = "Communication overhead; Computer aided software
engineering; Computer programming; Computer
workstations; Data communication systems; Distributed
computer systems; Ethernet; Fiber distributed data
interface; Interfaces (computer); Local area networks;
Mathematical models; Network protocols; Parallel
processing systems; Software Package Express; Software
Package PVM",
}
@Article{Lockey:1998:CRM,
author = "P. Lockey and R. Proctor and I. D. James",
title = "Characterization of {I/O} Requirements in a Massively
Parallel Shelf Sea Model",
journal = j-IJHPCA,
volume = "12",
number = "3",
pages = "320--332",
month = "Fall",
year = "1998",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/109434209801200302",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Nov 6 09:20:17 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200302",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = sep,
}
@InProceedings{Lu:1998:ONW,
author = "Honghui Lu and Y. Charlie Hu and Willy Zwaenepoel",
title = "{OpenMP} on Networks of Workstations",
crossref = "ACM:1998:SHP",
pages = "??--??",
year = "1998",
bibdate = "Wed Mar 06 06:32:51 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing98.bib;
http://www.supercomp.org/sc98/papers/",
URL = "http://www.supercomp.org/sc98/TechPapers/sc98_FullAbstracts/Lu1105/index.htm",
acknowledgement = ack-nhfb,
}
@Article{Mackay:1998:SPF,
author = "David Mackay and G. Mahinthakumar and Ed D'Azevedo",
title = "A Study of {I/O} in a Parallel Finite Element
Groundwater Transport Code",
journal = j-IJHPCA,
volume = "12",
number = "3",
pages = "307--319",
month = "Fall",
year = "1998",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/109434209801200301",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Nov 6 09:20:17 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200301",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = sep,
}
@Article{Mamontov:1998:AES,
author = "Y. V. Mamontov and M. Willander",
title = "An Algorithm to Evaluate Spectral Densities of
High-Dimensional Stationary Diffusion Stochastic
Processes with Non-linear Coefficients: The General
Scheme and Issues on Implementation with {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1541",
pages = "315--321",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Sep 15 10:01:31 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1998b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "applied parallel computing; computing science; PARA;
parallel computing",
}
@Article{Mans:1998:PDP,
author = "Bernard Mans",
title = "Portable distributed priority queues with {MPI}",
journal = j-CPE,
volume = "10",
number = "3",
pages = "175--198",
month = mar,
year = "1998",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:39 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=5373;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=5373&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Marinho:1998:WMP,
author = "J. Marinho and J. G. Silva",
title = "{WMPI} --- Message Passing Interface for {Win32}
Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "113--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Martins:1998:JIW,
author = "P. Martins and L. M. Silva and J. Silva",
title = "A {Java} Interface for {WMPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "121--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Medeiros:1998:IPM,
author = "P. D. Medeiros and J. C. Cunha",
title = "Interconnecting {PVM} and {MPI} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "105--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Megson:1998:CRH,
author = "G. M. Megson and R. S. Fish and D. N. J. Clarke",
title = "Creation of Reconfigurable Hardware Objects in {PVM}
Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "215--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{MF:1998:SIM,
author = "{MPI Forum}",
title = "Special Issue: {MPI2}: a Message-Passing Interface
Standard",
journal = j-IJHPCA,
volume = "12",
number = "1--2",
pages = "1--299",
month = "Spring--Summer",
year = "1998",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Apr 8 15:55:29 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Morimoto:1998:IMM,
author = "K. Morimoto and T. Matsumoto and K. Hiraki",
title = "Implementing {MPI} with the Memory-Based Communication
Facilities on the {SSS-CORE} Operating System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "223--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{MPIForum:1998:SIM,
author = "{MPI Forum}",
title = "Special Issue: {MPI2}: a Message-Passing Interface
Standard",
journal = j-IJSAHPC,
volume = "12",
number = "1--2",
pages = "1--299",
month = "Spring--Summer",
year = "1998",
CODEN = "IJSCFG",
ISSN = "1078-3482",
bibdate = "Wed Apr 8 15:55:29 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Supercomputer Applications
and High Performance Computing",
}
@Article{Neophytou:1998:NDJ,
author = "N. Neophytou and P. Evripidou",
title = "{Net-dbx}: a {Java} Powered Tool for Interactive
Debugging of {MPI} Programs Across the {Internet}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1470",
pages = "181--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nieplocha:1998:CHP,
author = "Jarek Nieplocha and Ian Foster and Rick A. Kendall",
title = "{ChemIO}: High Performance Parallel {I/O} for
Computational Chemistry Applications",
journal = j-IJHPCA,
volume = "12",
number = "3",
pages = "345--363",
month = "Fall",
year = "1998",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/109434209801200304",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Nov 6 09:20:17 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200304",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = sep,
}
@Article{Nitsche:1998:FMP,
author = "T. Nitsche and W. Webers",
title = "Functional Message Passing with {OPAL-MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "281--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Oldfield:1998:EPS,
author = "Ron A. Oldfield and David E. Womble and Curtis C.
Ober",
title = "Efficient Parallel {I/O} in Seismic Processing",
journal = j-IJHPCA,
volume = "12",
number = "3",
pages = "333--344",
month = "Fall",
year = "1998",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/109434209801200303",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Nov 6 09:20:17 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200303",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = sep,
}
@Article{Orlando:1998:MBR,
author = "S. Orlando and R. Perego",
title = "An {MPI}-based Run-Time Support to Coordinate {HPF}
Tasks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "289--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Papadopoulos:1998:DVS,
author = "P. M. Papadopoulos and J. A. Kohl",
title = "Dynamic Visualization and Steering Using {PVM} and
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "297--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Poggi:1998:UPD,
author = "Agostino Poggi and Giulio Destri",
title = "Using {PVM} to Develop a Distributed Object-Oriented
Language for Heterogeneous Processing",
journal = j-J-SYST-SOFTW,
volume = "40",
number = "2",
pages = "139--150",
month = feb,
year = "1998",
CODEN = "JSSODM",
ISSN = "0164-1212 (print), 1873-1228 (electronic)",
ISSN-L = "0164-1212",
bibdate = "Thu Sep 9 07:30:16 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/01641212",
acknowledgement = ack-nhfb,
fjournal = "The Journal of systems and software",
journal-URL = "http://www.sciencedirect.com/science/journal/01641212",
}
@Article{Rabenseifner:1998:MGI,
author = "R. Rabenseifner",
title = "{MPI-GLUE}: Interoperable High-Performance {MPI}
Combining Different Vendor's {MPI} Worlds",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1470",
pages = "563--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Oct 10 14:40:24 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Reussner:1998:SDA,
author = "R. Reussner and P. Sanders and L. Prechelt and M.
Mueller",
title = "{SKaMPI}: a Detailed, Accurate {MPI} Benchmark",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "52--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Scott:1998:PWN,
author = "S. L. Scott and M. Fischer and A. Geist",
title = "{PVM} on {Windows} and {NT} Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "231--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sevenich:1998:PPU,
author = "Richard Sevenich",
title = "Parallel Processing Using {PVM}",
journal = j-LINUX-J,
volume = "45",
pages = "??--??",
month = jan,
year = "1998",
CODEN = "LIJOFX",
ISSN = "1075-3583 (print), 1938-3827 (electronic)",
ISSN-L = "1075-3583",
bibdate = "Fri Oct 9 08:35:26 MDT 1998",
bibsource = "http://noframes.linuxjournal.com/lj-issues/issue45/index.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Turn your networked computers into a virtual
machine.",
acknowledgement = ack-nhfb,
fjournal = "Linux journal",
journal-URL = "http://portal.acm.org/citation.cfm?id=J508",
}
@Article{Simitci:1998:CLP,
author = "Huseyin Simitci and Daniel A. Reed",
title = "A Comparison of Logical and Physical Parallel {I/O}
Patterns",
journal = j-IJHPCA,
volume = "12",
number = "3",
pages = "364--380",
month = "Fall",
year = "1998",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/109434209801200305",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Nov 6 09:20:17 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434209801200305",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = sep,
}
@Book{Snir:1998:MCR,
author = "Marc Snir and Steve W. Otto and Steven Huss-Lederman
and David W. Walker and Jack Dongarra",
title = "{MPI}: The Complete Reference. Volume 1, The {MPI-1}
Core",
publisher = pub-MIT,
address = pub-MIT:adr,
edition = "Second",
pages = "450",
month = sep,
year = "1998",
ISBN = "0-262-69215-5 (vol. 1), 0-262-69216-3 (set)",
ISBN-13 = "978-0-262-69215-1 (vol. 1), 978-0-262-69216-8 (set)",
LCCN = "QA76.642 .M65 1998",
bibdate = "Thu Oct 29 07:27:43 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See also volume 2 \cite{Gropp:1998:MCR}.",
price = "US\$35 (paperback)",
series = "Scientific and Engineering Computation",
URL = "http://mitpress.mit.edu/book-home.tcl?isbn=0262692155",
acknowledgement = ack-nhfb,
}
@MastersThesis{Stockinger:1998:VPC,
author = "Kurt Stockinger",
title = "{ViMPIOS} --- a portable, client-server based
implementation of {MPI-IO} on {ViPIOS}",
type = "{Diplom-Arbeit}",
school = "Universit{\"a}t Wien",
address = "Vienna, Austria",
pages = "155",
year = "1998",
bibdate = "Wed Aug 27 07:21:00 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Thakur:1998:CUM,
author = "Rajeev S. Thakur",
title = "A Case for Using {MPI}'s Derived Datatypes to Improve
{I/O} Performance",
crossref = "ACM:1998:SHP",
pages = "??--??",
year = "1998",
bibdate = "Wed Oct 07 08:50:26 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.supercomp.org/sc98/papers/",
acknowledgement = ack-nhfb,
}
@Article{Topol:1998:PTV,
author = "Brad Topol and John T. Stasko and Vaidy Sunderam",
title = "{PVaniM}: a tool for visualization in network
computing environments",
journal = j-CPE,
volume = "10",
number = "14",
pages = "1197--1222",
day = "10",
month = dec,
year = "1998",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:45 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=40005932;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=40005932&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Tourino:1998:PBL,
author = "J. Touri{\~n}o and R. Doallo",
title = "A {PVM}-Based Library for Sparse Matrix
Factorizations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "304--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Traeff:1998:PRL,
author = "J. L. Traeff",
title = "Portable Randomized List Ranking on Multiprocessors
Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "395--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wismueller:1998:LMS,
author = "R. Wismueller",
title = "On-Line Monitoring Support in {PVM} and {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1497",
pages = "312--??",
year = "1998",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Jan 5 08:21:58 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Yalamanchilli:1998:CPJ,
author = "Narendar Yalamanchilli and William Cohen",
title = "Communication Performance of {Java} based {Parallel
Virtual Machines}",
crossref = "ACM:1998:AWJ",
pages = "??--??",
year = "1998",
bibdate = "Thu Apr 27 10:43:08 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.cs.ucsb.edu/conferences/java98/papers/passing.pdf;
http://www.cs.ucsb.edu/conferences/java98/papers/passing.ps",
acknowledgement = ack-nhfb,
}
@Article{Zhou:1998:LST,
author = "Honbo Zhou and Al Geist",
title = "{LPVM}: a step towards multithread {PVM}",
journal = j-CPE,
volume = "10",
number = "5",
pages = "407--416",
day = "25",
month = apr,
year = "1998",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:40 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=5385;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=5385&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@InProceedings{Alexandrov:1999:PMC,
author = "V. Alexandrov and A. Karaivanova",
title = "Parallel {Monte Carlo} algorithms for sparse {SLAE}
using {MPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "283--290",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Anonymous:1999:BRMa,
author = "Anonymous",
title = "Book Review: {{\booktitle{MPI --- The complete
reference: Volume 1, the MPI core}}, second edition: By
Marc Snir, Steve Otto, Steven Huss-Lederman, David
Walker and Jack Dongarra. MIT Press, Cambridge, MA.
(1998). 426 pages. \$35.00}",
journal = j-COMPUT-MATH-APPL,
volume = "37",
number = "3",
pages = "130--130",
month = feb,
year = "1999",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:57 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122199903590",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Anonymous:1999:BRMb,
author = "Anonymous",
title = "Book Review: {{\booktitle{MPI-The complete reference:
Volume 2, the MPI-2 extensions}}: By William Gropp,
Steven Huss-Lederman, Andrew Lumsdaine, Ewing Lusk,
Bill Nitzberg, William Saphir and Marc Snir. MIT Press,
Cambridge, MA. (1998). 344 pages. \$35.00}",
journal = j-COMPUT-MATH-APPL,
volume = "37",
number = "3",
pages = "130--130",
month = feb,
year = "1999",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:57 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122199903619",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Anonymous:1999:BRMf,
author = "Anonymous",
title = "Book Review: {{\booktitle{MPI --- The complete
reference: Volume 1, the MPI core}}, second edition: By
Marc Snir, Steve Otto, Steven Huss-Lederman, David
Walker and Jack Dongarra. MIT Press, Cambridge, MA
(1998). 426 pages. \$35.00}",
journal = j-COMPUT-MATH-APPL,
volume = "37",
number = "6",
pages = "130--130",
month = mar,
year = "1999",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:58 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122199902237",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Anonymous:1999:BRMg,
author = "Anonymous",
title = "Book Review: {{\booktitle{MPI-The complete reference:
Volume 2, the MPI-2 extensions}}: By William Gropp,
Steven Huss-Lederman, Andrew Lumsdaine, Ewing Lusk,
Bill Nitzberg, William Saphir and Marc Snir. MIT Press,
Cambridge, MA. (1998). 344 pages. \$35.00}",
journal = j-COMPUT-MATH-APPL,
volume = "37",
number = "6",
pages = "130--130",
month = mar,
year = "1999",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:48:58 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122199902250",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@InProceedings{Asai:1999:MIF,
author = "Noboru Asai and Thomas Kentemich and Pierre Lagier",
title = "{MPI-2} Implementation on a {Fujitsu Generic Message
Passing Kernel}",
crossref = "ACM:1999:SPO",
pages = "??--??",
year = "1999",
bibdate = "Thu Feb 24 09:02:57 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sc99.org/techpapers/",
acknowledgement = ack-nhfb,
}
@InProceedings{Ayguade:1999:EML,
author = "E. Ayguade and X. Martorell and J. Labarta and M.
Gonzalez and N. Navarro",
editor = "????",
booktitle = "{Proceedings of the 1999 International Conference on
Parallel Processing}",
title = "Exploiting multiple levels of parallelism in {OpenMP}:
a case study",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "172--180",
year = "1999",
bibdate = "Mon Oct 07 08:57:41 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Badia:1999:SIT,
author = "J. M. Badia and A. M. Vidal",
title = "Solving the inverse {Toeplitz} eigenproblem using
{ScaLAPACK} and {MPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "372--379",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Baker:1999:MOO,
author = "M. Baker and B. Carpenter and G. Fox and Sung Hoon
Koo",
title = "{mpiJava}: An Object-Oriented {Java} Interface to
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1586",
pages = "748--??",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Baraglia:1999:PAN,
author = "R. Baraglia and R. Ferrini and D. Laforenza and A.
Lagana",
title = "Parallel approaches to a numerically intensive
application using {PVM}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "364--371",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Barbosa:1999:ADM,
author = "J. Barbosa and A. Padilha",
title = "Algorithm-Dependant Method to Determine the Optimal
Number of Computers in Parallel Virtual Machines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1573",
pages = "508--521",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 14 06:09:05 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "parallel processing; VECPAR; vector processing",
}
@Article{Barnard:1999:MIS,
author = "Stephen T. Barnard and Luis M. Bernardo and Horst D.
Simon",
title = "An {MPI} Implementation of the {SPAI} Preconditioner
on the {T3E}",
journal = j-IJHPCA,
volume = "13",
number = "2",
pages = "107--123",
month = "Summer",
year = "1999",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri May 21 13:56:09 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@InProceedings{Bassomo:1999:PGE,
author = "P. Bassomo and I. Sakho and A. Corbel",
title = "Porting generalized eigenvalue software on distributed
memory machines using systolic model principles",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "396--403",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Bernaschi:1999:ERA,
author = "M. Bernaschi and G. Iannello and M. Lauria",
title = "Experimental Results about {MPI} Collective
Communication Operations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1593",
pages = "774--??",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Bertozzi:1999:MIT,
author = "M. Bertozzi and F. Boselli and G. Conte and M.
Reggiani",
title = "An {MPI} implementation on the top of the virtual
interface architecture",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "199--206",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Beyls:1999:JJP,
author = "K. Beyls and E. D'Hollander and Y. Yu",
title = "{JPT}: a {Java} parallelization tool",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "173--180",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Blaheta:1999:LFM,
author = "R. Blaheta and O. Jakl and J. Stary",
title = "Large-scale {FE} modelling in geomechanics: a case
study in parallelization",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "299--306",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Borkowski:1999:LVC,
author = "J. Borkowski",
title = "On line visualization or combining the standard {ORNL
PVM} with a vendor {PVM} implementation",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "157--164",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Boudet:1999:PIH,
author = "V. Boudet and F. Rastello and Y. Robert",
title = "{PVM} implementation of heterogeneous {ScaLAPACK}
dense linear solvers",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "333--340",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Bova:1999:NOM,
author = "S. W. Bova and C. P. Breshears and C. Cuicchi and Z.
Demirbilek and H. Gabb",
editor = "????",
booktitle = "{Proceedings of the ISCA 12th International
Conference. Parallel and Distributed Systems}",
title = "Nesting {OpenMP} in an {MPI} application.",
publisher = "ISCA",
address = "Raleigh, NC, USA",
pages = "566--571",
year = "1999",
bibdate = "Mon Oct 07 09:02:21 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Bova:1999:PPM,
author = "Steve W. Bova and Clay P. Breshears and Henry Gabb and
Rudolf Eigenmann and Greg Gaertner and Bob Kuhn and
Bill Magro and Stefano Salvini",
title = "Parallel Programming with Message Passing and
Directives",
journal = j-SIAM-NEWS,
volume = "32",
number = "9",
pages = "??--??",
month = nov,
year = "1999",
ISSN = "0036-1437",
bibdate = "Mon Oct 07 09:13:31 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "SIAM News",
journal-URL = "http://www.siam.org/news/",
}
@InProceedings{Bubak:1999:EFP,
author = "M. Bubak and W. Funika and K. Iskra and R.
Maruszewski",
title = "Enhancing the functionality of performance measurement
tools for message passing environments",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "67--74",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Bubak:1999:TPR,
author = "M. Bubak and P. Luszczek",
title = "Towards portable runtime support for irregular and
out-of-core computations",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "59--66",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Cappello:1999:PNB,
author = "F. Cappello and O. Richard and D. Etiemble",
title = "Performance of the {NAS} Benchmarks on a Cluster of
{SMP PCs} Using a Parallelization of the {MPI} Programs
with {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1662",
pages = "339--350",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999b.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Cerin:1999:DMP,
author = "C. Cerin",
title = "Differentiating Message Passing Interface and Bulk
Synchronous Parallel Computation Models",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1662",
pages = "477--??",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Chapman:1999:EOF,
author = "B. Chapman and P. Mehrotra and H. Zima",
editor = "????",
booktitle = "{Proceedings of Eighth ECMWF Workshop on the Use of
Parallel Processors in Meteorology. Towards
Teracomputing}",
title = "Enhancing {OpenMP} with features for locality
control",
publisher = pub-WORLD-SCI,
address = pub-WORLD-SCI:adr,
pages = "301--313",
year = "1999",
bibdate = "Mon Oct 07 09:10:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{ChassindeKergommeaux:1999:MER,
author = "J. {Chassin de Kergommeaux} and M. Ronsse and K. {De
Bosschere}",
title = "{MPL0*}: {Efficient} record\slash replay of
nondeterministic features of message passing
libraries",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "141--148",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Chaussumier:1999:ACM,
author = "F. Chaussumier and F. Desprez and L. Prylli",
title = "Asynchronous communications in {MPI} --- The
{BIP\slash Myrinet} approach",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "485--492",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Chergui:1999:UPP,
author = "J. Chergui",
title = "Using {PMD} to parallel solve large-scale
{Navier--Stokes} equations. Performance analysis on
{SGI\slash CRAY-T3E} machine",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "341--348",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Chien:1999:DEH,
author = "A. Chien and M. Lauria and R. Pennington and M.
Showerman and G. Iannello and M. Buchanan and K.
Connelly and L. Giannini and G. Koenig and S.
Krishnamurthy and Q. Liu and S. Pakin and G.
Sampemane",
title = "Design and Evaluation of an {HPVM}-Based {Windows NT}
Supercomputer",
journal = j-IJHPCA,
volume = "13",
number = "3",
pages = "201--219",
month = "Fall",
year = "1999",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Jul 28 14:14:38 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@InProceedings{Ciegis:1999:HDA,
author = "R. Ciegis and R. Sablinskas and J. Wasniewski",
title = "Hyper-Rectangle distribution algorithm for parallel
multidimensional numerical integration",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "275--282",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Claver:1999:PCS,
author = "J. M. Claver and M. Mollar and V. Hernandez",
title = "Parallel computation of the {SVD} of a matrix
product",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "388--395",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Clematis:1999:EPC,
author = "A. Clematis and V. Gianuzzi",
title = "Extending {PVM} with consistent cut capabilities:
{Application} aspects and implementation strategies",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "101--108",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Corbacho-Lozano:1999:EDD,
author = "J. Corbacho-Lozano and O.-I. Lepe-Aldama and J.
Sole-Pareta and J. Domingo-Pascual",
title = "Experiences deploying a distributed parallel
processing environment over a broadband multiservice
network",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "477--484",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Cormen:1999:PBP,
author = "Thomas H. Cormen and James C. Clippinger",
title = "Performing {BMMC} Permutations Efficiently on
Distributed-Memory Multiprocessors with {MPI}",
journal = j-ALGORITHMICA,
volume = "24",
number = "3--4",
pages = "349--370",
month = aug,
year = "1999",
CODEN = "ALGOEJ",
ISSN = "0178-4617 (print), 1432-0541 (electronic)",
ISSN-L = "0178-4617",
MRclass = "68Q22",
MRnumber = "MR1687275",
bibdate = "Fri Jan 6 11:38:11 MST 2006",
bibsource = "dblp-journals-algorithmica.bib;
http://dblp.uni-trier.de/db/journals/algorithmica/algorithmica24.html#CormenC99;
http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0178-4617&volume=24&issue=3;
http://www.math.utah.edu/pub/tex/bib/index-table-a.html#algorithmica;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
MathSciNet database",
URL = "http://link.springer.de/link/service/journals/00453/bibs/24n3p349.html;
http://www.springerlink.com/openurl.asp?genre=article&issn=0178-4617&volume=24&issue=3&spage=349",
acknowledgement = ack-nhfb,
fjournal = "Algorithmica. An International Journal in Computer
Science",
journal-URL = "http://link.springer.com/journal/453",
oldlabel = "CormenC99",
XMLdata = "ftp://ftp.informatik.uni-trier.de/pub/users/Ley/bib/records.tar.gz#journals/algorithmica/CormenC99",
}
@InProceedings{Cownie:1999:SID,
author = "J. Cownie and W. Gropp",
title = "A standard interface for debugger access to message
queue information in {MPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "51--58",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Czarnul:1999:DAP,
author = "P. Czarnul and H. Krawczyk",
title = "Dynamic assignment with process migration in
distributed environments",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "509--516",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Dan:1999:QAM,
author = "Pei Dan and Wang Dongsheng and Zhang Youhui and Shen
Meiming",
title = "Quasi-asynchronous migration: a novel migration
protocol for {PVM} tasks",
journal = j-OPER-SYS-REV,
volume = "33",
number = "2",
pages = "5--14",
month = apr,
year = "1999",
CODEN = "OSRED8",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Sat Aug 26 08:55:42 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
}
@InProceedings{DeSande:1999:NBS,
author = "F. {De Sande} and C. Leon and C. Rodriguez and J.
Roda",
title = "Nested bulk synchronous parallel computing",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "189--198",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Eberl:1999:PCP,
author = "M. Eberl and W. Karl and C. Trinitis and A.
Blaszczyk",
title = "Parallel computing on {PC} clusters --- an alternative
to supercomputers for industrial applications",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "493--498",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Eickermann:1999:PID,
author = "T. Eickermann and H. Grund and J. Henrichs",
title = "Performance issues of distributed {MPI} applications
in a {German} gigabit testbed",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "3--10",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Espinosa:1999:REB,
author = "A. Espinosa and F. Parcerisa and T. Margalef and E.
Luque",
title = "Relating the execution behaviour with the structure of
the application",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "91--100",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Fang:1999:PMD,
author = "Zhiwu Fang and A. D. J. Haymet and Wataru Shinoda and
Susumu Okazaki",
title = "Parallel molecular dynamics simulation: Implementation
of {PVM} for a lipid membrane",
journal = j-COMP-PHYS-COMM,
volume = "116",
number = "2--3",
pages = "295--310",
month = feb,
year = "1999",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/S0010-4655(98)00089-7",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 21:30:34 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm1990.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465598000897",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@InProceedings{Fava:1999:MPI,
author = "A. Fava and M. Fava and M. Bertozzi",
title = "{MPIPOV}: a parallel implementation of {POV-Ray} based
on {MPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "426--433",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Ferenc:1999:VMK,
author = "D. Ferenc and J. Nabrzyski and M. Stroinski and P.
Wierzejewski",
title = "Visual {MPI}, a knowledge-based system for writing
efficient {MPI} applications",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "257--266",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Fernandez:1999:PGP,
author = "F. Fernandez and J. M. Sanchez and M. Tomassini and J.
A. Gomez",
title = "A parallel genetic programming tool based on {PVM}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "241--248",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Frugoli:1999:DCH,
author = "G. Frugoli and A. Fava and E. Fava and G. Conte",
title = "Distributed collision handling for particle-based
simulation",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "410--417",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Gallud:1999:CCU,
author = "J. A. Gallud and J. M. Garcia and J.
Garcia-Consuegra",
title = "Cluster computing using {MPI} and {Windows NT} to
solve the processing of remotely sensed imagery",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "442--449",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Gallud:1999:DPR,
author = "J. A. Gallud and J. Garcia-Consuegra and A. Martinez",
title = "Distributed Processing of Remotely Sensed {Landsat-TM}
Imagery Using {MPI}",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "2",
number = "2",
pages = "??--??",
month = "????",
year = "1999",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Fri Dec 19 08:14:13 MST 2003",
bibsource = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no2.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no2abs.html#gallud",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@InProceedings{Garcia:1999:MMI,
author = "F. Garcia and A. Calderon and J. Carretero",
title = "{MiMPI}: a multithread-safe implementation of {MPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "207--214",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Garzon:1999:PIE,
author = "E. M. Garzon and I. Garcia",
title = "A parallel implementation of the eigenproblem for
large, symmetric and sparse matrices",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "380--387",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Getov:1999:MJM,
author = "Vladimir Getov and Paul Gray and Vaidy Sunderam",
title = "{MPI} and {Java-MPI}: Contrasts and Comparisons of
Low-level Communication Performance",
crossref = "ACM:1999:SPO",
pages = "??--??",
year = "1999",
bibdate = "Thu Feb 24 09:02:57 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sc99.org/techpapers/",
acknowledgement = ack-nhfb,
}
@Article{Giordano:1999:IBP,
author = "M. Giordano and M. M. Furnari and F. Vitobello",
title = "Interaction between {PVM} Parameters and Communication
Performances on {ATM} Networks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1557",
pages = "586--587",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 14 06:09:05 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "image processing; multimedia; parallel computation;
parallel computing; parallel numerics; ParNum",
}
@InProceedings{Godlevsky:1999:PSA,
author = "A. Godlevsky and M. Gazak and L. Hluchy",
title = "Parallelizing of sequential annotated programs in
{PVM} environment",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "517--524",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Golebiewski:1999:HPI,
author = "M. Golebiewski and M. Baum and R. Hempel",
title = "High Performance Implementation of {MPI} for {Myrinet}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1557",
pages = "510--521",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 14 06:09:05 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "image processing; multimedia; parallel computation;
parallel computing; parallel numerics; ParNum",
}
@InProceedings{Gonzalez:1999:PPM,
author = "J. A. Gonzalez and C. Rodriguez and J. L. Roda and D.
G. Morales",
title = "Performance and predictability of {MPI} and {BSP}
programs on the {CRAY T3E}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "27--34",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Gropp:1999:RMM,
author = "W. Gropp and E. Lusk",
title = "Reproducible measurements of {MPI} performance
characteristics",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "11--18",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Book{Gropp:1999:UMA,
author = "William Gropp and Ewing Lusk and Rajeev Thakur",
title = "Using {MPI-2}: Advanced Features of the {Message
Passing Interface}",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "275",
month = nov,
year = "1999",
ISBN = "0-262-57133-1",
ISBN-13 = "978-0-262-57133-3",
LCCN = "QA76.642 .G762 1999",
bibdate = "Fri Feb 01 06:52:50 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "US\$32.50",
series = "Scientific and Engineering Computation",
URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571331",
acknowledgement = ack-nhfb,
}
@Book{Gropp:1999:UMP,
author = "William Gropp and Ewing Lusk and Anthony Skjellum",
title = "Using {MPI}: Portable Parallel Programming with the
{Message Passing Interface}",
publisher = pub-MIT,
address = pub-MIT:adr,
edition = "Second",
pages = "350",
month = nov,
year = "1999",
ISBN = "0-262-57132-3 (vol. 1), 0-262-57134-X (set)",
ISBN-13 = "978-0-262-57132-6 (vol. 1), 978-0-262-57134-0 (set)",
LCCN = "QA76.642.G76 1999",
bibdate = "Mon Sep 20 05:54:39 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "US\$32.50",
series = "Scientific and Engineering Computation",
URL = "http://www.mitpress.com/book-home.tcl?isbn=0262571323",
acknowledgement = ack-nhfb,
}
@Article{Hempel:1999:AMP,
author = "Rolf Hempel and Falk Zimmermann",
title = "Automatic migration from {PARMACS} to {MPI} in
parallel {Fortran} applications",
journal = j-SCI-PROG,
volume = "7",
number = "1",
pages = "39--46",
month = "????",
year = "1999",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib; OCLC
Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=64cr5a4mg33tuhcbdr02%26referrer=parent%26backto=issue%2C3%2C7%3Bjournal%2C8%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@InProceedings{Hidalgo:1999:MMP,
author = "J. I. Hidalgo and M. Prieto and J. Lanchares and F.
Tirado",
title = "A method for model parameter identification using
parallel genetic algorithms",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "291--298",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Hluchy:1999:GWF,
author = "L. Hluchy and V. D. Tran and L. Halada and M.
Dobrucky",
title = "Ground water flow modelling in {PVM}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "450--460",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Huse:1999:CCD,
author = "L. P. Huse",
title = "Collective communication on dedicated clusters of
workstations",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "469--476",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Ishihara:1999:VBS,
author = "S. Ishihara and S. Tani and A. Takahara",
title = "Virtual {BUS}: a simple implementation of an
effortless networking system based on {PVM}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "461--468",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Kielmann:1999:MMC,
author = "Thilo Kielmann and Rutger F. H. Hofman and Henri E.
Bal and Aske Plaat and Raoul A. F. Bhoedjang",
title = "{MagPIe}: {MPI}'s collective communication operations
for clustered wide area systems",
journal = j-SIGPLAN,
volume = "34",
number = "8",
pages = "131--140",
month = aug,
year = "1999",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sun Dec 14 09:18:06 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/proceedings/ppopp/301104/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/citations/proceedings/ppopp/301104/p131-kielmann/",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Koholka:1999:MPR,
author = "R. Koholka and H. Mayer and A. Goller",
title = "{MPI}-parallelized Radiance on {SGI CoW} and {SMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1557",
pages = "549--558",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 14 06:09:05 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "image processing; multimedia; parallel computation;
parallel computing; parallel numerics; ParNum",
}
@InProceedings{Kranzlmueller:1999:MOM,
author = "D. Kranzlmueller and R. Reussner and C.
Schaubschlaeger",
title = "Monitor overhead measurement with {SKaMPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "43--50",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Larsen:1999:SPG,
author = "M. Larsen and P. Madsen",
title = "A scalable parallel {Gauss--Seidel} and {Jacobi}
solver for animal genetics",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "356--363",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Lee:1999:PEJ,
author = "Bu-Sung Lee and Yan Gu and Wentong Cai and Alfred
Heng",
title = "Performance Evaluation of {JPVM}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "9",
number = "3",
pages = "401--??",
month = sep,
year = "1999",
CODEN = "PPLTEE",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Jan 6 12:02:35 MST 2005",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Luo:1999:SMV,
author = "Yong Luo",
title = "Shared Memory vs. Message Passing: The {COMOPS}
Benchmark Experiment",
journal = j-J-SUPERCOMPUTING,
volume = "13",
number = "3",
pages = "283--301",
month = may,
year = "1999",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1023/A:1008009103962",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 6 12:13:10 MDT 2005",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=13&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/issuetoc.htm/0920-8542+13+3+1999",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=13&issue=3&spage=283;
http://www.wkap.nl/oasis.htm/206582",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "distributed computing; message passing; MPI;
performance evaluation; shared memory",
}
@InProceedings{MacFarlane:1999:PPI,
author = "A. MacFarlane and J. A. McCann and S. E. Robertson",
title = "{PLIERS}: a parallel information retrieval system
using {MPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "317--324",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Matuszek:1999:BPG,
author = "M. R. Matuszek and A. Mazurkiewicz and P. W. Uminski",
title = "Benchmarking the {PVM} group communication
efficiency",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "499--508",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Mierendorff:1999:PMB,
author = "H. Mierendorff and H. Schwamborn",
title = "Performance modeling based on {PVM}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "75--82",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Migliardi:1999:PEH,
author = "M. Migliardi and V. Sunderam",
title = "{PVM} emulation in the harness metacomputing system: a
plug-in based approach",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "117--124",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Morimoto:1999:PEM,
author = "K. Morimoto and T. Matsumoto and K. Hiraki",
title = "Performance evaluation of the {MPI\slash MBCF} with
the {NAS} parallel benchmarks",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "19--26",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Morrison:1999:FPP,
author = "J. P. Morrison and R. W. Connolly",
title = "Facilitating parallel programming in {PVM} using
condensed graphs",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "181--188",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Mourao:1999:IMO,
author = "F. E. Mourao and J. G. Silva",
title = "Implementing {MPI}'s one-sided communications for
{WMPI}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "231--240",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Neyman:1999:ERP,
author = "M. Neyman and M. Bukowski and P. Kuzora",
title = "Efficient replay of {PVM} programs",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "83--90",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Nicolescu:1999:PWA,
author = "C. Nicolescu and B. Albers and P. Jonker",
title = "Parallel watershed algorithm on images from cranial
{CT-scans} using {PVM} and {MPI} on a distributed
memory system",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "418--425",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Papagapiou:1999:NWD,
author = "A. Papagapiou and P. Evripidou and G. Samaras",
title = "{Net-Console}: a {Web}-based development environment
for {MPI} programs",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "249--256",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Parrilia:1999:UPD,
author = "L. Parrilia and J. Ortega and A. Lloris",
title = "Using {PVM} for distributed logic minimization in a
network of computers",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "541--548",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Pereira:1999:PBI,
author = "N. S. A. Pereira",
title = "A Parallel {$N$}-body Integrator Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1573",
pages = "627--639",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 14 06:09:05 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
keywords = "parallel processing; VECPAR; vector processing",
}
@InProceedings{Plazek:1999:IIC,
author = "J. Plazek and K. Banas and J. Kitowski",
title = "Implementation issues of computational fluid dynamics
algorithms on parallel computers",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "349--355",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Prieto:1999:PRM,
author = "M. Prieto and R. Santiago and I. M. Llorente and F.
Tirado",
title = "A parallel robust multigrid algorithm based on
semi-coarsening",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "307--316",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Prylli:1999:DHP,
author = "L. Prylli and B. Tourancheau and R. Westrelin",
title = "The design for a high performance {MPI} implementation
on the {Myrinet} network",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "223--230",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Rabenseifner:1999:APM,
author = "R. Rabenseifner",
title = "Automatic profiling of {MPI} applications with
hardware performance counters",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "35--42",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Radhakrishna:1999:MBP,
author = "H. Radhakrishna and S. Divakar and N. Magotra and S.
R. J. Brueck",
title = "{MPI}-Based Parallel Implementation of a Lithography
Pattern Simulation Algorithm",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1593",
pages = "109--??",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Roe:1999:PMI,
author = "Kevin Roe and Piyush Mehrotra",
title = "Parallelization of a multigrid incompressible viscous
cavity flow solver using {openMP}",
type = "{NASA} contractor report",
number = "NASA\slash CR-1999-209551",
institution = inst-NLRC,
address = inst-NLRC:adr,
pages = "????",
year = "1999",
bibdate = "Thu Mar 16 07:20:02 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Also ICASE report 99-36.",
acknowledgement = ack-nhfb,
keywords = "cavity flow; incompressible flow; multigrid methods;
two dimensional flow; viscous flow",
}
@InProceedings{Rungsawang:1999:PDT,
author = "A. Rungsawang and A. Tangpong and P. Laohawee",
title = "Parallel {DSIR} text retrieval system",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "325--332",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Russ:1999:UHR,
author = "Samuel H. Russ and Jonathan Robinson and Matt Gleeson
and Brad Meyers and Laxman Rajagopalan and Chun-Heong
Tan",
title = "Using {Hector} to run {MPI} programs over networked
workstations",
journal = j-CPE,
volume = "11",
number = "4",
pages = "189--204",
day = "10",
month = apr,
year = "1999",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:48 MDT 1999",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
note = "Special Issue: Applications of Distributed Computing
Environments.",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=61004080;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=61004080&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Scherer:1999:TAP,
author = "Alex Scherer and Honghui Lu and Thomas Gross and Willy
Zwaenepoel",
title = "Transparent adaptive parallelism on {NOWs} using
{OpenMP}",
journal = j-SIGPLAN,
volume = "34",
number = "8",
pages = "96--106",
month = aug,
year = "1999",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sun Dec 14 09:18:06 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/proceedings/ppopp/301104/;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan1990.bib",
URL = "http://www.acm.org/pubs/citations/proceedings/ppopp/301104/p96-scherer/",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@InProceedings{Schuele:1999:HAP,
author = "J. Schuele",
title = "Heading for an asynchronous parallel ocean model",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "404--409",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@MastersThesis{Seifert:1999:ESI,
author = "Friedrich Seifert",
title = "{Entwicklung von Systemsoftware zur Integration der
Virtual InterfaceArchitecture (VIA) in den Linux
Betriebssystemkern f{\"u}r optimiertes MessagePassing}.
({German}) [{Development} of system software for
integration of the {Virtual InterfaceArchitecture
(VIA)} in the {Linux} operating system for optimized
message passing]",
type = "{Diplomarbeit}",
school = "Technische Universit{\"a}t Chemnitz-Zwickau",
address = "Chemnitz, Germany",
pages = "115",
year = "1999",
bibdate = "Wed Aug 27 06:25:09 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
language = "German",
}
@Article{Sen:1999:PBD,
author = "Vikramaditya Sen and Mrinal K. Sen and Paul L.
Stoffa",
title = "{PVM} based {$3$-D Kirchhoff} depth migration using
dynamically computed travel-times: an application in
seismic data processing",
journal = j-PARALLEL-COMPUTING,
volume = "25",
number = "3",
pages = "231--248",
day = "22",
month = mar,
year = "1999",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Aug 6 10:16:02 MDT 1999",
bibsource = "http://www.elsevier.com/cgi-bin/cas/tree/store/parco/cas_free/browse/browse.cgi?year=1999&volume=25&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/cas/tree/store/parco/sub/1999/25/3/1389.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@InProceedings{Shen:1999:ATL,
author = "Kai Shen and Hong Tang and Tao Yang",
title = "Adaptive Two-level Thread Management for Fast {MPI}
Execution on Shared Memory Machines",
crossref = "ACM:1999:SPO",
pages = "??--??",
year = "1999",
bibdate = "Thu Feb 24 09:02:57 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sc99.org/techpapers/",
acknowledgement = ack-nhfb,
}
@Article{Sidonio:1999:PBI,
author = "N. Sidonio and A. Pereira",
title = "A Parallel {$N$}-body Integrator Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1573",
pages = "627--??",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Silva:1999:DPP,
author = "F. Silva and H. Paulino and L. Lopes",
title = "{DipSystem}: a parallel programming system for
distributed memory architectures",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "525--532",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Silva:1999:IME,
author = "P. Silva and J. G. Silva",
title = "Implementing {MPI-2} extended collective operations",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "125--132",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Sistare:1999:MSP,
author = "Steve Sistare and Erica Dorenkamp and Nick Nevin",
title = "{MPI} Support in the {Prism} Programming Environment",
crossref = "ACM:1999:SPO",
pages = "??--??",
year = "1999",
bibdate = "Thu Feb 24 09:02:57 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sc99.org/techpapers/",
acknowledgement = ack-nhfb,
}
@InProceedings{Sistare:1999:OMC,
author = "Steve Sistare and Rolf vandeVaart and Eugene Loh",
title = "Optimization of {MPI} Collectives on Clusters of
Large-scale {SMPs}",
crossref = "ACM:1999:SPO",
pages = "??--??",
year = "1999",
bibdate = "Thu Feb 24 09:02:57 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sc99.org/techpapers/",
acknowledgement = ack-nhfb,
}
@InProceedings{Stankovic:1999:NVJ,
author = "N. Stankovic and K. Zhang",
title = "Native versus {Java} message passing",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "165--172",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Szeberenyi:1999:SGB,
author = "I. Szeberenyi and G. Domokos",
title = "Solving generalized boundary value problems with
distributed computing and recursive programming",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "267--274",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Takahashi:1999:IEM,
author = "T. Takahashi and F. O'Carroll and H. Tezuka and A.
Hori",
title = "Implementation and Evaluation of {MPI} on an {SMP}
Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1586",
pages = "1178--??",
year = "1999",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Sep 13 16:57:02 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs1999a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tang:1999:CRT,
author = "Hong Tang and Kai Shen and Tao Yang",
title = "Compile\slash run-time support for threaded {MPI}
execution on multiprogrammed shared memory machines",
journal = j-SIGPLAN,
volume = "34",
number = "8",
pages = "107--118",
month = aug,
year = "1999",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sun Dec 14 09:18:06 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/proceedings/ppopp/301104/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/citations/proceedings/ppopp/301104/p107-tang/",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Throop:1999:SOS,
author = "Joe Throop",
title = "Standards: {OpenMP}: Shared-Memory Parallelism from
the Ashes",
journal = j-COMPUTER,
volume = "32",
number = "5",
pages = "108--109",
month = may,
year = "1999",
CODEN = "CPTRB4",
ISSN = "0018-9162 (print), 1558-0814 (electronic)",
ISSN-L = "0018-9162",
bibdate = "Thu May 6 06:17:23 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computer1990.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/co/books/co1999/pdf/r5108.pdf",
acknowledgement = ack-nhfb,
fjournal = "Computer",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2",
}
@InProceedings{Tourino:1999:MMC,
author = "J. Touri{\~n}o and R. Doallo",
title = "Modeling {MPI} collective communications on the
{AP3000 Multicomputer}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "133--140",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Traeff:1999:FFE,
author = "J. L. Traeff and R. Hempel and H. Ritzdoff and F.
Zimmermann",
title = "Flattening on the fly: {Efficient} handling of {MPI}
derived datatypes",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "109--116",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Vazquez:1999:PNS,
author = "G. E. Vazquez and N. B. Brignole",
title = "Parallel {NLP} strategies using {PVM} on heterogeneous
distributed environments",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "533--540",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Wisniewski:1999:SME,
author = "Len Wisniewski and Brad Smisloff and Nils Nieuwejaar",
title = "{Sun MPI I/O}: Efficient {I/O} for Parallel
Applications",
crossref = "ACM:1999:SPO",
pages = "??--??",
year = "1999",
bibdate = "Thu Feb 24 09:02:57 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sc99.org/techpapers/",
acknowledgement = ack-nhfb,
}
@InProceedings{Wong:1999:BMM,
author = "F. C. Wong and A. C. Arpaci-Dusseau and D. E. Culler",
title = "Building {MPI} for multi-programming systems using
implicit information",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "215--222",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Wu:1999:JBD,
author = "X. Wu and Q. Chen and X.-H. Sun",
title = "A {Java}-based Distributed Debbuger Supporting {MPI}
and {PVM}",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "2",
number = "4",
pages = "??--??",
month = "????",
year = "1999",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Fri Dec 19 08:14:14 MST 2003",
bibsource = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.cs.okstate.edu/~pdcp/vols/vol02/vol02no4abs.html#wu",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@InProceedings{Wu:1999:MCC,
author = "P.-Y. Wu",
title = "Minimum communication cost fractal image compression
on {PVM}",
crossref = "Dongarra:1999:RAP",
number = "1697",
pages = "434--441",
year = "1999",
bibdate = "Thu Dec 9 06:08:35 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Zaki:1999:TSP,
author = "Omer Zaki and Ewing Lusk and William Gropp and Deborah
Swider",
title = "Toward Scalable Performance Visualization with
{Jumpshot}",
journal = j-IJHPCA,
volume = "13",
number = "3",
pages = "277--288",
month = "Fall",
year = "1999",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Jul 28 14:14:38 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
keywords = "Java; MPI (Message Passing Interface) profiling",
}
@Article{Zoraja:1999:SPD,
author = "Ivan Zoraja and Hermann Hellwagner and Vaidy
Sunderam",
title = "{SCIPVM}: {Parallel} distributed computing on {SCI}
workstation clusters",
journal = j-CPE,
volume = "11",
number = "3",
pages = "121--138",
month = mar,
year = "1999",
CODEN = "CPEXEI",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Tue Sep 7 06:06:47 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract?ID=61003667;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=61003667&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Misc{Beguelin:19xx:PSS,
author = "A. Beguelin and J. J. Dongarra and G. A. Geist and R.
Manchek and V. S. Sunderam",
title = "{PVM} Software System and Documentation",
howpublished = "Email to {\tt netlib@ornl.gov}",
month = "????",
year = "19xx",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
Parallel/Par.Arch.Indep.bib",
}
@TechReport{Geist:19xx:NBC,
author = "G. A. Geist and V. S. Sunderam",
title = "Network Based Concurrent Computing on the {PVM}
System",
institution = inst-ORNL # " and " # inst-EMORY,
address = inst-ORNL:adr # " and " # inst-EMORY:adr,
year = "19xx",
bibsource = "Distributed/Dist.Sys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
}
@Article{Adhianto:2000:TOA,
author = "L. Adhianto and F. Bodin and B. Chapman and L. Hascoet
and A. Kneer and D. Lancaster and I. Wolton and M.
Wirtz",
title = "Tools for {OpenMP} application development: the {POST}
project",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1177--1191",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1177::AID-CPE533>3.0.CO;2-V",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500357/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500357&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Anonymous:2000:BRUd,
author = "Anonymous",
title = "Book Review: {{\booktitle{Using MPI-2: Advanced
features of the message-passing interface}}: By William
Gropp, Ewing Lusk and Rajeev Thakur. The MIT Press,
Cambridge, MA. (1999). 382 pages. \$35 (each); \$60
(set)}",
journal = j-COMPUT-MATH-APPL,
volume = "40",
number = "2--3",
pages = "419--419",
month = jul # "\slash " # aug,
year = "2000",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:49:10 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122100902098",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Anonymous:2000:BRUe,
author = "Anonymous",
title = "Book Review: {{\booktitle{Using MPI: Portable parallel
programming with the message-passing interface}}:
Second edition. By William Gropp, Ewing Lusk and
Anthony Skjellum. The MIT Press, Cambridge, MA. (1999).
371 pages. \$35 (each); \$60 (set)}",
journal = j-COMPUT-MATH-APPL,
volume = "40",
number = "2--3",
pages = "419--419",
month = jul # "\slash " # aug,
year = "2000",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:49:10 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122100902074",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Armstrong:2000:QDB,
author = "Brian Armstrong and Seon Wook Kim and Rudolf
Eigenmann",
title = "Quantifying Differences between {OpenMP} and {MPI}
Using a Large-Scale Application Suite",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "482--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2000.bib;
http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400482.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400482.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Astalos:2000:CMS,
author = "J{\'a}n Astalos and Ladislav Hluch{\'y}",
title = "{CIS} --- a Monitoring System for {PC} Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "225--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080225.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080225.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Baiardi:2000:AMM,
author = "Fabrizio Baiardi and Sarah Chiti and Paolo Mori and
Laura Ricci",
title = "Adaptive Multigrid Methods in {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "80--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080080.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080080.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Berrendorf:2000:PCO,
author = "Rudolf Berrendorf and Guido Nieken",
title = "Performance characteristics for {OpenMP} constructs on
different parallel computer architectures",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1261--1273",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1261::AID-CPE525>3.0.CO;2-5",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500355/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500355&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@InProceedings{Bircsak:2000:EONa,
author = "John Bircsak and Peter Craig and RaeLyn Crowell and
Zarka Cvetanovic and Jonathan Harris and C. Alexander
Nelson and Carl D. Offner",
title = "Extending {OpenMP} for {NUMA} Machines",
crossref = "ACM:2000:SHP",
pages = "68--69",
year = "2000",
bibdate = "Mon Feb 12 12:29:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap226.pdf",
acknowledgement = ack-nhfb,
}
@Article{Bircsak:2000:EONb,
author = "John Bircsak and Peter Craig and RaeLyn Crowell and
others",
title = "Extending {OpenMP} for {NUMA} machines",
journal = j-SCI-PROG,
volume = "8",
number = "3",
pages = "163--181",
year = "2000",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 08:44:35 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Bolloni:2000:TIQ,
author = "Alessandro Bolloni and Stefano Crocchianti and Antonio
Lagan{\`a}",
title = "Time Independent {$3$D} Quantum Reactive Scattering on
{MIMD} Parallel Computers",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "338--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080338.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080338.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bolton:2000:MPL,
author = "Hermanus P. J. Bolton and Jaco F. Schutte and Albert
A. Groenwold",
title = "Multiple Parallel Local Searches in Global
Optimization",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "88--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080088.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080088.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Booth:2000:SSM,
author = "S. Booth and E. Mourao",
title = "Single-sided {MPI} Implementations for {SUN MPI}",
crossref = "ACM:2000:SHP",
pages = "46--46",
year = "2000",
bibdate = "Mon Feb 12 11:57:40 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap182.pdf",
acknowledgement = ack-nhfb,
}
@Article{Bova:2000:DLP,
author = "Steve W. Bova and Clay P. Breshears and Christine E.
Cuicchi and Zeki Demirbilek and Henry A. Gabb",
title = "Dual-Level Parallel Analysis of Harbor Wave Response
Using {MPI} and {OpenMP}",
journal = j-IJHPCA,
volume = "14",
number = "1",
pages = "49--64",
month = "Spring",
year = "2000",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Sep 12 12:39:11 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Brieger:2000:HOO,
author = "Leesa Brieger",
title = "{HPF} to {OpenMP} on the {Origin2000}: a case study",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1147--1154",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1147::AID-CPE526>3.0.CO;2-Q",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500351/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500351&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Brorsson:2000:SIE,
author = "Mats Brorsson and Barbara Chapman",
title = "Special Issue: {EWOMP'99 --- First European Workshop
on OpenMP}",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1117--1119",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1117::AID-CPE543>3.0.CO;2-#",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500352/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500352&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@InProceedings{Bruno:2000:PEH,
author = "G. Bruno and A. A. Chien and M. J. Katz and P. M.
Papadopoulos",
title = "Performance Enhancements for {HPVM} in Multi-Network
and Heterogeneous Hardware",
crossref = "Engquist:2000:SVG",
pages = "17--32",
year = "2000",
bibdate = "Mon Oct 23 10:53:54 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Brunschen:2000:OCP,
author = "Christian Brunschen and Mats Brorsson",
title = "{OdinMP\slash CCp} --- a portable implementation of
{OpenMP} for {C}",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1193--1203",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1193::AID-CPE527>3.0.CO;2-U",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500347/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500347&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Bubak:2000:IOB,
author = "Marian Bubak and W. odzimierz Funika and Bartosz Balis
and Roland Wism{\"u}ller",
title = "Interoperability of {OCM}-Based On-Line Tools",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "242--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080242.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080242.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Bull:2000:JOL,
author = "J. M. Bull and M. E. Kambites",
editor = "????",
booktitle = "{Proceedings of the ACM 2000 conference on Java
Grande}",
title = "{JOMP}: an {OpenMP}-like interface for {Java}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "44--53",
year = "2000",
bibdate = "Mon Oct 07 09:19:42 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Bull:2000:PPJ,
author = "J. Mark Bull and Mark E. Kambites and Jan Obdrzalek",
title = "Parallel Programming in {Java} with {OpenMP}-like
Directives",
crossref = "ACM:2000:SHP",
pages = "150--150",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
acknowledgement = ack-nhfb,
}
@Article{Butler:2000:SPM,
author = "Ralph Butler and William Gropp and Ewing Lusk",
title = "A Scalable Process-Management Environment for Parallel
Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "168--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080168.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080168.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InCollection{Cahir:2000:PMM,
author = "Margaret Cahir and Robert Moench and Alice E.
Koniges",
title = "Programming Models and Methods",
crossref = "Koniges:2000:ISP",
chapter = "3",
pages = "27--54",
year = "2000",
bibdate = "Fri Feb 04 18:32:51 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Discusses PVM, MPI, SHMEM, High-Performance Fortran,
and POSIX threads.",
acknowledgement = ack-nhfb,
}
@InProceedings{Cappello:2000:MVM,
author = "Franck Cappello and Daniel Etiemble",
title = "{MPI} versus {MPI+OpenMP} on the {IBM SP} for the {NAS
Benchmarks}",
crossref = "ACM:2000:SHP",
pages = "51--51",
year = "2000",
bibdate = "Mon Feb 12 11:57:42 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap214.pdf",
acknowledgement = ack-nhfb,
}
@Article{Carpenter:2000:MML,
author = "Bryan Carpenter and Vladimir Getov and Glenn Judd and
Anthony Skjellum and Geoffrey Fox",
title = "{MPJ}: {MPI}-like message passing for {Java}",
journal = j-CPE,
volume = "12",
number = "11",
pages = "1019--1038",
month = sep,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200009)12:11<1019::AID-CPE518>3.0.CO;2-G",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76000188/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76000188&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Carpenter:2000:OSM,
author = "Bryan Carpenter and Geoffrey Fox and Sung Hoon Ko and
Sang Lim",
title = "Object serialization for marshaling data in a {Java}
interface to {MPI}",
journal = j-CPE,
volume = "12",
number = "7",
pages = "539--553",
month = may,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200005)12:7<539::AID-CPE498>3.0.CO;2-H",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sun Oct 29 16:57:07 MST 2000",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/72516217/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=72516217&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@InProceedings{Cartwright:2000:AOE,
author = "Keith L. Cartwright and Joseph D. Blahovec",
title = "Adding {OpenMP} to an Existing {MPI} Code: Will It be
Beneficial?",
crossref = "ACM:2000:SHP",
pages = "145--145",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Chen:2000:MCO,
author = "Hsiang Ann Chen and Yvette O. Carrasco and Amy W.
Apon",
title = "{MPI} Collective Operations over {IP} Multicast",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1800",
pages = "51--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:16:18 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1800.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1800/18000051.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1800/18000051.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ciaccio:2000:GMG,
author = "Giuseppe Ciaccio and Giovanni Chiola",
title = "{GAMMA} and {MPI\slash GAMMA} on Gigabit {Ethernet}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "129--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080129.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080129.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Cotronis:2000:CMP,
author = "J. Y. Cotronis and Z. Tsiatsoulis and C. Kouniakis",
title = "Composition of Message Passing Applications
On-Demand",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "192--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080192.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080192.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Couturier:2000:PMD,
author = "Rapha{\"e}l Couturier and Christophe Chipot",
title = "Parallel molecular dynamics using {OpenMP} on a shared
memory machine",
journal = j-COMP-PHYS-COMM,
volume = "124",
number = "1",
pages = "49--59",
day = "15",
month = jan,
year = "2000",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/S0010-4655(99)00432-4",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:40:32 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465599004324",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Domokos:2000:PRC,
author = "G{\'a}bor Domokos and Imre Szeber{\'e}nyi and Paul H.
Steen",
title = "Parallel, Recursive Computation of Global Stability
Charts for Liquid Bridges",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "64--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080064.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080064.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dozsa:2000:THL,
author = "G{\'a}bor D{\'o}zsa and D{\'a}niel Dr{\'o}tos and
R{\'o}bert Lovas",
title = "Translation of a High-Level Graphical Code to
Message-Passing Primitives in the {GRADE} Programming
Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "258--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080258.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080258.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Eigenmann:2000:TMPa,
author = "Rudolf Eigenmann and Tim Mattson",
title = "Tutorial {M6A}: Parallel Programming with {OpenMP}:
{Part I}",
crossref = "ACM:2000:SHP",
pages = "21--21",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Eigenmann:2000:TMPb,
author = "Rudolf Eigenmann and Tim Mattson",
title = "Tutorial {M6B}: Parallel Programming with {OpenMP}:
{Part II}",
crossref = "ACM:2000:SHP",
pages = "23--23",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
acknowledgement = ack-nhfb,
}
@Article{Espinosa:2000:APA,
author = "Antonio Espinosa and Tomas Margalef and Emilio Luque",
title = "Automatic Performance Analysis of Master\slash Worker
{PVM} Applications with {Kpi}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "47--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080047.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080047.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fagg:2000:AAC,
author = "Graham E. Fagg and Sathish S. Vadhiyar and Jack J.
Dongarra",
title = "{ACCT}: {Automatic Collective Communications Tuning}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "354--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080354.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080354.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fagg:2000:FMF,
author = "Graham E. Fagg and Jack J. Dongarra",
title = "{FT-MPI}: {Fault Tolerant MPI}, Supporting Dynamic
Applications in a Dynamic World",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "346--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080346.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080346.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fahringer:2000:FOP,
author = "Thomas Fahringer and Michael Gerndt and Graham Riley
and Jesper Larsson Tr{\"a}ff",
title = "Formalizing {OpenMP} Performance Properties with
{ASL}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "428--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400428.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400428.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fernandez:2000:DCE,
author = "Francisco Fern{\'a}ndez and Marco Tomassini and
Leonardo Vanneschi and Laurent Bucher",
title = "A Distributed Computing Environment for Genetic
Programming Using {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "322--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080322.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080322.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fernandez:2000:UPM,
author = "Gustavo J. Fern{\'a}ndez and Julio Jacobo-Berlles and
Patricia Borensztejn and Marisa Bauz{\'a} and Marta
Mejail",
title = "Use of {PVM} for {MAP} Image Restoration: a Parallel
Implementation of the {ARTUR} Algorithm",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "113--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080113.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080113.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fink:2000:IMC,
author = "Torsten Fink",
title = "Integrating {MPI} Components into Metacomputing
Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "208--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080208.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080208.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Geist:2000:PMW,
author = "Al Geist",
title = "{PVM} and {MPI}: What Else Is Needed for Cluster
Computing?",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "1--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080001.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080001.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Girona:2000:VDC,
author = "Sergi Girona and Jes{\'u}s Labarta and Rosa M. Badia",
title = "Validation of Dimemas Communication Model for {MPI}
Collective Operations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "39--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080039.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080039.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Gonzalez:2000:AIT,
author = "M. Gonzalez and A. Serra and X. Martorell and J.
Oliver and E. Ayguade and J. Labarta and N. Navarro",
editor = "????",
booktitle = "{Proceedings 14th International Parallel and
Distributed Processing Symposium. IPDPS 2000}",
title = "Applying interposition techniques for performance
analysis of {OpenMP} parallel applications",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "235--240",
year = "2000",
bibdate = "Mon Oct 07 09:07:07 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Gonzalez:2000:NSF,
author = "Marc Gonz{\`a}lez and Eduard Ayguad{\'e} and Xavier
Martorell and Jes{\'u}s Labarta and Nacho Navarro and
Jos{\'e} Oliver",
title = "{NanosCompiler}: supporting flexible multilevel
parallelism exploitation in {OpenMP}",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1205--1218",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1205::AID-CPE524>3.0.CO;2-2",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500358/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500358&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Gonzalez:2000:PAM,
author = "Daniel Gonz{\'a}lez and Francisco Almeida and Luz
Marina Moreno and Casiano Rodr{\'\i}guez",
title = "Pipeline Algorithms on {MPI}: Optimal Mapping of the
Path Planing Problem",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "104--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080104.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080104.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gonzalez:2000:TSN,
author = "J. A. Gonz{\'a}lez and C. Le{\'o}n and F. Piccoli and
M. Printista and J. L. Roda and C. Rodr{\'\i}guez and
F. Sande",
title = "Towards Standard Nested Parallelism",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "96--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080096.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080096.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2000:RCD,
author = "William D. Gropp",
title = "Runtime Checking of Datatype Signatures in {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "160--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080160.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080160.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Gropp:2000:TSU,
author = "William Gropp and Ewing (Rusty) Lusk and Rajeev S.
Thakur",
title = "Tutorial {S1}: Using {MPI-2}: a Tutorial on Advanced
Features of the Message-Passing Interface",
crossref = "ACM:2000:SHP",
pages = "11--11",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Book{Gropp:2000:UMA,
author = "William Gropp and Ewing Lusk and Rajeev Thakur",
title = "Using {MPI-2}: Advanced Features of the {Message
Passing Interface}",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xxi + 382",
year = "2000",
ISBN = "0-262-57133-1",
ISBN-13 = "978-0-262-57133-3",
LCCN = "QA76.642 .G762 1999",
bibdate = "Wed Aug 27 06:19:05 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Scientific and engineering computation",
acknowledgement = ack-nhfb,
}
@InProceedings{He:2000:PAA,
author = "Yun (Helen) He and Chris H. Q. Ding",
title = "Platforms: An Accurate Arithmetics Approach",
crossref = "ACM:2000:SHP",
pages = "150--150",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
abstract = "Numerical reproducibility of large-scale scientific
simulations, especially climate modeling, on
distributed memory parallel computers are becoming
critical issues. In particular, global summation and
dot products of distributed arrays are very susceptible
to rounding errors. We analyzed several accurate
summation methods and found that two methods are
particularly effective to improve (ensure)
reproducibility: Kahan's self-compensated summation and
Bailey's double-double precision summation. We provide
an MPI operator MPI\_SUMDD to work with MPI collective
operations to ensure a scalable implementation on large
number of processors. The final methods are
particularly simple to adopt in practical codes.",
acknowledgement = ack-nhfb,
keywords = "floating-point arithmetic; rounding errors",
}
@InProceedings{He:2000:UAA,
author = "Yun He and Chris H. Q. Ding",
title = "Using accurate arithmetics to improve numerical
reproducibility and stability in parallel
applications",
crossref = "Reynders:2000:IPI",
pages = "225--234",
year = "2000",
DOI = "https://doi.org/10.1145/335231.335253",
bibdate = "Sat Feb 8 18:35:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/335231.335253",
abstract = "Numerical reproducibility and stability of large scale
scientific simulations, especially climate modeling, on
distributed memory parallel computers are becoming
critical issues. In particular, global summation of
distributed arrays is most susceptible to rounding
errors, and their propagation and accumulation cause
uncertainty in final simulation results. We analyzed
several accurate summation methods and found that two
methods are particularly effective to improve (ensure)
reproducibility and stability: Kahan's self-compensated
summation and Bailey's double-double precision
summation. We provide an MPI operator MPLSUMDD to work
with MPI collective operations to ensure a scalable
implementation on large number of processors. The final
methods are particularly simple to adopt in practical
codes.",
acknowledgement = ack-nhfb,
}
@Article{Hisley:2000:PPE,
author = "Dixie Hisley and Gagan Agrawal and Punyam
Satya-narayana and Lori Pollock",
title = "Porting and performance evaluation of irregular codes
using {OpenMP}",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1241--1259",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1241::AID-CPE523>3.0.CO;2-D",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500349/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500349&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Hu:2000:ONS,
author = "Y. Charlie Hu and Honghui Lu and Alan L. Cox and Willy
Zwaenepoel",
title = "{OpenMP} for Networks of {SMPs}",
journal = j-J-PAR-DIST-COMP,
volume = "60",
number = "12",
pages = "1512--1530",
day = "1",
month = dec,
year = "2000",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.2000.1658",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Tue Jul 17 08:06:43 MDT 2001",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1658;
http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1658/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1658/ref",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Huse:2000:MOS,
author = "Lars Paul Huse",
title = "{MPI} Optimization for {SMP} Based Clusters
Interconnected with {SCI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "56--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080056.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080056.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Imamura:2000:ASM,
author = "Toshiyuki Imamura and Yuichi Tsujita and Hiroshi Koide
and Hiroshi Takemiya",
title = "An Architecture of {Stampi}: {MPI} Library on a
Cluster of Parallel Computers",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "200--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080200.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080200.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ishizaka:2000:CGT,
author = "Kazuhisa Ishizaka and Motoki Obata and Hironori
Kasahara",
title = "Coarse-Grain Task Parallel Processing Using the
{OpenMP} Backend of the {OSCAR} Multigrain
Parallelizing Compiler",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "457--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400457.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400457.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Iskra:2000:IDE,
author = "K. A. Iskra and F. van der Linden and Z. W. Hendrikse
and B. J. Overeinder and G. D. van Albada and P. M. A.
Sloot",
title = "The implementation of dynamite: an environment for
migrating {PVM} tasks",
journal = j-OPER-SYS-REV,
volume = "34",
number = "3",
pages = "40--55",
month = jul,
year = "2000",
CODEN = "OSRED8",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Sat Aug 26 08:55:47 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
}
@Article{Iskra:2000:PMD,
author = "K. A. Iskra and Z. W. Hendrikse and G. D. van Albada
and B. J. Overeinder and P. M. A. Sloot",
title = "Performance Measurements on {Dynamite\slash DPVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "27--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080027.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080027.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Jin:2000:AGO,
author = "Haoqiang Jin and Michael Frumkin and Jerry Yan",
title = "Automatic Generation of {OpenMP} Directives and Its
Application to Computational Fluid Dynamics Codes",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "440--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400440.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400440.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kuhn:2000:OVT,
author = "Bob Kuhn and Paul Petersen and Eamonn O'Toole",
title = "{OpenMP} versus threading in {C\slash C++}",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1165--1176",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1165::AID-CPE529>3.0.CO;2-L",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500354/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500354&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Kusano:2000:PEO,
author = "Kazuhiro Kusano and Shigehisa Satoh and Mitsuhisa
Sato",
title = "Performance Evaluation of the Omni {OpenMP} Compiler",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "403--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400403.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400403.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Landman:2000:PLR,
author = "Joseph Landman and Piotr Piecuch",
title = "Parallelization of a legacy research program using
{OpenMP}",
journal = j-FORTRAN-FORUM,
volume = "19",
number = "2",
pages = "16--23",
month = aug,
year = "2000",
CODEN = "????",
ISSN = "1061-7264 (print), 1931-1311 (electronic)",
ISSN-L = "1061-7264",
bibdate = "Wed Feb 6 18:50:08 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran-forum.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM Fortran Forum",
}
@Article{Laohawee:2000:PDT,
author = "P. Laohawee and A. Tangpong and A. Rungsawang",
title = "Parallel {DSIR} Text Indexing System: Using Multiple
Master\slash Slave Concept",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "297--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080297.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080297.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Lassous:2000:HGA,
author = "Isabelle Gu{\'e}rin Lassous and Jens Gustedt and
Michel Morvan",
title = "Handling Graphs According to a Coarse Grained
Approach: Experiments with {PVM} and {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "72--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080072.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080072.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Livny:2000:MYW,
author = "Miron Livny",
title = "Managing Your Workforce on a Computational Grid",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "3--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080003.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080003.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Louca:2000:MFP,
author = "S. Louca and N. Neophytou and A. Lachanas and P.
Evripidou",
title = "{MPI-FT}: Portable Fault Tolerance Scheme for {MPI}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "10",
number = "4",
pages = "371--??",
month = dec,
year = "2000",
CODEN = "PPLTEE",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Wed Jul 25 16:34:42 2001",
bibsource = "http://ejournals.wspc.com.sg/ppl/ppl.shtml;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ejournals.wspc.com.sg/ppl/10/1004/S0129626400000342.html",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Lusk:2000:IIC,
author = "Ewing Lusk",
title = "Isolating and Interfacing the Components of a Parallel
Computing Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "5--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080005.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080005.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Mattson:2000:BOF,
author = "Tim Mattson",
title = "{BOF}: {OpenMP} and its Future Developments",
crossref = "ACM:2000:SHP",
pages = "106--106",
year = "2000",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2000.bib",
acknowledgement = ack-nhfb,
}
@Article{Mattson:2000:IO,
author = "Timothy G. Mattson",
title = "An Introduction to {OpenMP 2.0}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "384--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400384.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400384.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mazzocca:2000:TPP,
author = "N. Mazzocca and M. Rak and U. Villano",
title = "The Transition from a {PVM} Program Simulator to a
Heterogeneous System Simulator: The {HeSSE} Project",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "266--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080266.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080266.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{McDonald:2000:TPA,
author = "Chris McDonald and Kamran Kazemi",
title = "Teaching parallel algorithm with process topologies",
journal = j-SIGCSE,
volume = "32",
number = "1",
pages = "70--74",
month = mar,
year = "2000",
CODEN = "SIGSD3",
DOI = "https://doi.org/10.1145/331795.331816",
ISSN = "0097-8418 (print), 2331-3927 (electronic)",
ISSN-L = "0097-8418",
bibdate = "Mon Nov 19 10:05:03 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigcse2000.bib",
abstract = "Parallel algorithms are often introduced to students
by describing the geometric topologies formed by
communicating processes and often the geographic
relationships between them. However, the two most
common message passing environments used in teaching,
PVM and MPI, each provide only rudimentary support for
the specification and execution of process topologies.
There is a strong need for better syntactic and
semantic support for process topologies in these
environments, so that students may concentrate on the
algorithms being studied, and not have to wrestle with
the environments' infrastructure. This paper first
motivates, and then describes the use of additional
support within PVM and MPI which addresses this need.",
acknowledgement = ack-nhfb,
fjournal = "SIGCSE Bulletin (ACM Special Interest Group on
Computer Science Education)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688",
}
@Article{Mierendorff:2000:WMB,
author = "Hermann Mierendorff and Kl{\"a}re Cassirer and Helmut
Schwamborn",
title = "Working with {MPI} Benchmarking Suites on {ccNUMA}
Architectures",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "18--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080018.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080018.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Migliardi:2000:SFT,
author = "Mauro Migliardi and Vaidy Sunderam and Arrigo
Frisiani",
title = "A Simple, Fault Tolerant Naming Space for the
{HARNESS} Metacomputing System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "152--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080152.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080152.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mourao:2000:SSC,
author = "Elson Mour{\~a}o and Stephen Booth",
title = "Single Sided Communications in Multi-protocol {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "176--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080176.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080176.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Neyman:2000:CDA,
author = "Marcin Neyman",
title = "Comparison of Different Approaches to Trace {PVM}
Program Execution",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "274--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080274.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080274.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Nikolopoulos:2000:DDN,
author = "Dimitrios S. Nikolopoulos and Theodore S.
Papatheodorou and Constantine D. Polychronopoulos and
Jesus Labarta and Eduard Ayguade",
title = "Is Data Distribution Necessary in {OpenMP}?",
crossref = "ACM:2000:SHP",
pages = "68--68",
year = "2000",
bibdate = "Mon Feb 12 11:57:45 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap192.pdf",
acknowledgement = ack-nhfb,
}
@Article{Nikolopoulos:2000:LTD,
author = "Dimitrios S. Nikolopoulos and Theodore S.
Papatheodorou and Constantine D. Polychronopoulos and
Jes{\'u}s Labarta and Eduard Ayguad{\'e}",
title = "Leveraging Transparent Data Distribution in {OpenMP}
via User-Level Dynamic Page Migration",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "415--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400415.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400415.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nikolopoulos:2000:TRD,
author = "Dimitrios S. Nikolopoulos and Theodore S.
Papatheodorou and Constantine D. Polychronopoulos and
others",
title = "A transparent runtime data distribution engine for
{OpenMP}",
journal = j-SCI-PROG,
volume = "8",
number = "3",
pages = "143--162",
year = "2000",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 08:44:35 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Nikolopoulos:2000:ULR,
author = "Dimitrios S. Nikolopoulos and Theodore S.
Papatheodorou and Constantine D. Polychronopoulos and
Jes{\'u}s Labarta and Eduard Ayguad{\'e}",
title = "{UPM LIB}: a Runtime System for Tuning the Memory
Performance of {OpenMP} Programs on Scalable
Shared-Memory Multiprocessors",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1915",
pages = "85--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:08:51 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1915.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1915/19150085.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1915/19150085.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nishitani:2000:IEO,
author = "Yasunori Nishitani and Kiyoshi Negishi and Hiroshi
Ohta and Eiji Nunohiro",
title = "Implementation and Evaluation of {OpenMP} for {Hitachi
SR8000}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "391--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400391.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400391.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nitsche:2000:TCM,
author = "Thomas Nitsche",
title = "Thread Communication over {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "145--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080145.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080145.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Ong:2000:PCL,
author = "Hong Ong and Paul A. Farrell",
title = "Performance Comparison of {LAM\slash MPI}, {MPICH},
and {MVICH} on a {Linux} Cluster Connected by a
{Gigabit Ethernet} Network",
crossref = "USENIX:2000:PAL",
pages = "??--??",
year = "2000",
bibdate = "Wed Oct 16 05:17:16 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.usenix.org/publications/library/proceedings/als2000/ong.html",
acknowledgement = ack-nhfb,
}
@Article{Orlando:2000:MDT,
author = "S. Orlando and P. Palmerini and R. Perego",
title = "Mixed data and task parallelism with {HPF} and {PVM}",
journal = "Cluster Computing",
volume = "3",
number = "3",
publisher = "Kluwer Academic Publishers, Boston, U.S.A",
pages = "201--213",
year = "2000",
CODEN = "????",
ISSN = "1386-7857",
bibdate = "Sat Dec 7 09:42:43 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta
database",
acknowledgement = ack-nhfb,
pagecount = "13",
}
@Article{Payrits:2000:UPC,
author = "Szabolcs Payrits and Zolt{\'a}n Szatm{\'a}ry and
L{\'a}szl{\'o} Zal{\'a}nyi and P{\'e}ter {\'E}rdi",
title = "Use of Parallel Computers in Neurocomputing",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "313--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080313.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080313.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Pedroso:2000:MPC,
author = "Hern{\^a}ni Pedroso and Jo{\~a}o Gabriel Silva",
title = "{MPI-2} Process Creation \& Management Implementation
for {NT} Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "184--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080184.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080184.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Petcu:2000:PDAa,
author = "Dana Petcu",
title = "{PVMaple}: a Distributed Approach to Cooperative Work
of {Maple} Processes",
type = "Technical report",
institution = "Westers University of Timisoara",
address = "Timisoara, Romania",
month = may,
year = "2000",
bibdate = "Wed Dec 17 18:08:30 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.risc.uni-linz.ac.at/software/distmaple/index_1.html",
URL = "http://www.risc.uni-linz.ac.at/software/distmaple/misc/PVMaple.ps.gz",
acknowledgement = ack-nhfb,
keywords = "Distributed Maple; PVMaple",
}
@Article{Petcu:2000:PDAb,
author = "Dana Petcu",
title = "{PVMaple}: a Distributed Approach to Cooperative Work
of {Maple} Processes",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "216--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080216.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080216.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Plazek:2000:SCC,
author = "Joanna P{\l}azek and Krzysztof Banas and Jacek
Kitowski",
title = "Scalable {CFD} Computations Using Message-Passing and
Distributed Shared Memory Algorithms",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "282--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080282.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080282.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Protopopov:2000:SMC,
author = "Boris V. Protopopov and Anthony Skjellum",
title = "Shared-memory communication approaches for an {MPI}
message-passing library",
journal = j-CPE,
volume = "12",
number = "9",
pages = "799--820",
day = "10",
month = aug,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(20000810)12:9<799::AID-CPE476>3.0.CO;2-1",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sun Oct 29 16:57:07 MST 2000",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/72516482/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=72516482&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Quoy:2000:PNN,
author = "Mathias Quoy and Sorin Moga and Philippe Gaussier and
Arnaud Revel",
title = "Parallelization of Neural Networks Using {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "289--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080289.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080289.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Rabaea:2000:EPM,
author = "Adrian Rabaea and Monica Rabaea",
title = "Experiments with Parallel {Monte Carlo} Simulation for
Pricing Options Using {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "330--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080330.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080330.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Reussner:2000:BMD,
author = "Ralf Reussner and Jesper Larsson Tr{\"a}ff and Gunnar
Hunzelmann",
title = "A Benchmark for {MPI} Derived Datatypes",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "10--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080010.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080010.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@MastersThesis{Rohrl:2000:PPS,
author = "Armin R{\"o}hrl",
title = "Parallel processing in statistical computation: {BSP},
{FPGas} and {MPI} for the {S}-language",
type = "Th{\`e}se sciences",
school = "EPF Lausanne",
address = "Lausanne, Switzerland",
pages = "137",
year = "2000",
bibdate = "Wed Aug 27 07:24:45 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Roy:2000:MGQ,
author = "Alain J. Roy and Ian Foster and William Gropp and
Nicholas Karonis and Volker Sander and Brian Toonen",
title = "{MPICH-GQ}: Quality-of-Service for Message Passing
Programs",
crossref = "ACM:2000:SHP",
pages = "54--54",
year = "2000",
bibdate = "Mon Feb 12 11:57:43 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap234.pdf",
acknowledgement = ack-nhfb,
}
@Article{Scherer:2000:APO,
author = "Alex Scherer and Thomas Gross and Willy Zwaenepoel",
title = "Adaptive Parallelism for {OpenMP} Task Parallel
Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1915",
pages = "113--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:08:51 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1915.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1915/19150113.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1915/19150113.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Shah:2000:FCS,
author = "Sanjiv Shah and Grant Haab and Paul Petersen and Joe
Throop",
title = "Flexible control structures for parallelism in
{OpenMP}",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1219--1239",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1219::AID-CPE530>3.0.CO;2-0",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500348/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500348&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Shyu:2000:APV,
author = "Shyong-Jian Shyu and B. M. T. Lin",
title = "An application of parallel virtual machine framework
to film production problem",
journal = j-COMPUT-MATH-APPL,
volume = "39",
number = "12",
pages = "53--62",
month = jun,
year = "2000",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:49:06 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122100001292",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Silva:2000:HPC,
author = "Lu{\'\i}s Moura Silva and Paulo Martins and Jo{\~a}o
Gabriel Silva",
title = "Heterogeneous parallel computing using {Java} and
{WMPI}",
journal = j-CPE,
volume = "12",
number = "11",
pages = "1077--1091",
month = sep,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200009)12:11<1077::AID-CPE521>3.0.CO;2-#",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76000189/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76000189&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Smith:2000:DPM,
author = "Lorna Smith and Paul Kent",
title = "Development and performance of a mixed {OpenMP\slash
MPI} quantum {Monte Carlo} code",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1121--1129",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1121::AID-CPE531>3.0.CO;2-N",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500350/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500350&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Solsona:2000:MCM,
author = "Francesc Solsona and Francesc Gin{\'e} and Josep
L{\'e}rida and Porfidio Hern{\'a}ndez and Emilio
Luque",
title = "{Monito}: a Communication Monitoring Tool for a
{PVM--Linux} Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "233--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080233.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080233.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sosa:2000:IQC,
author = "C. P. Sosa and G. Scalmani and R. Gomperts and M. J.
Frisch",
title = "Ab initio quantum chemistry on a {ccNUMA} architecture
using {openMP}. {III}",
journal = j-PARALLEL-COMPUTING,
volume = "26",
number = "7--8",
pages = "843--856",
month = jul,
year = "2000",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Oct 28 17:44:32 MDT 2000",
bibsource = "http://www.elsevier.com/locate/issn/01678191;
http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.nl/gej-ng/10/35/21/42/29/25/abstract.html;
http://www.elsevier.nl/gej-ng/10/35/21/42/29/25/article.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Sterling:2000:SCB,
author = "Thomas Sterling",
title = "Symbolic Computing with {Beowulf}-Class {PC}
Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "7--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080007.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080007.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Suppi:2000:IOP,
author = "Remo Suppi and Fernando Cores and Emilio Luque",
title = "Improving Optimistic {PDES} in {PVM} Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "304--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080304.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080304.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tanaka:2000:PEO,
author = "Yoshizumi Tanaka and Kenjiro Taura and Mitsuhisa Sato
and Akinori Yonezawa",
title = "Performance Evaluation of {OpenMP} Applications with
Nested Parallelism",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1915",
pages = "100--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:08:51 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1915.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1915/19150100.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1915/19150100.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tang:2000:PTR,
author = "Hong Tang and Kai Shen and Tao Yang",
title = "Program transformation and runtime support for
threaded {MPI} execution on shared-memory machines",
journal = j-TOPLAS,
volume = "22",
number = "4",
pages = "673--700",
year = "2000",
CODEN = "ATPSDT",
ISSN = "0164-0925 (print), 1558-4593 (electronic)",
ISSN-L = "0164-0925",
bibdate = "Tue Apr 17 10:05:24 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/citations/journals/toplas/2000-22-4/p673-tang/",
abstract = "Parallel programs written in MPI have been widely used
for developing high-performance applications on various
platforms. Because of a restriction of the MPI
computation model, conventional MPI implementations on
shared-memory machines map each MPI node to an OS
process, which can suffer serious performance
degradation in the presence of multiprogramming. This
paper studies compile-time and runtime techniques for
enhancing performance portability of MPI code running
on multiprogrammed shared-memory machines. The proposed
techniques allow MPI nodes to be executed safety and
efficiently as threads. Compile-time transformation
eliminates global and static variables in C code using
node-specific data. The runtime support includes an
efficient and provably correct communication protocol
that uses lock-free data structure and takes advantage
of address space sharing among threads. The experiments
on SGI Origin 2000 show that our MPI prototype called
TMPI using the proposed techniques is competitive with
SGI's native MPI implementation in a dedicated
environment, and that it has significant performance
advantages in a multiprogrammed environment.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Programming Languages and
Systems",
generalterms = "Algorithms; Design; Experimentation; Languages;
Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783",
keywords = "lock-free synchronization; MPI; multiprogrammed
environments; program transformation; shared-memory
machines; threaded execution",
subject = "Hardware --- Memory Structures --- Design Styles
(B.3.2): {\bf Shared memory}; Software --- Programming
Techniques --- Concurrent Programming (D.1.3): {\bf
Parallel programming}; Software --- Programming
Languages --- Language Classifications (D.3.2): {\bf
Concurrent, distributed, and parallel languages};
Software --- Programming Languages --- Processors
(D.3.4): {\bf Preprocessors}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Run-time
environments}; Software --- Operating Systems ---
Process Management (D.4.1): {\bf
Multiprocessing/multiprogramming/multitasking}; Data
--- Data Structures (E.1): {\bf Lists, stacks, and
queues}",
}
@Article{Tatebe:2000:IOO,
author = "Osamu Tatebe and Mitsuhisa Sato and Satoshi
Sekiguchi",
title = "Impact of {OpenMP} Optimizations for the {MGCG}
Method",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1940",
pages = "471--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:17:15 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1940.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1940/19400471.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1940/19400471.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tavora:2000:DCM,
author = "V{\'\i}tor N. T{\'a}vora and Lu{\'\i}s M. Silva and
Jo{\~a}o Gabriel Silva",
title = "Distributed Checkpointing Mechanism for a Parallel
File System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "137--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080137.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080137.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Thiruvathukal:2000:JNW,
author = "George K. Thiruvathukal and Phillip M. Dickens and
Shahzad Bhatti",
title = "{Java} on networks of workstations {(JavaNOW)}: a
parallel computing framework inspired by {Linda} and
the {Message Passing Interface (MPI)}",
journal = j-CPE,
volume = "12",
number = "11",
pages = "1093--1116",
month = sep,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200009)12:11<1093::AID-CPE522>3.0.CO;2-6",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76000187/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76000187&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Tourancheau:2000:HSN,
author = "Bernard Tourancheau",
title = "High Speed Networks for Clusters, the {BIP-Myrinet}
Experience",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "9--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080009.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080009.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Traff:2000:IMO,
author = "Jesper Larsson Traff and Hubert Ritzdorf and Rolf
Hempel",
title = "The Implementation of {MPI-2} One-Sided Communication
for the {NEC SX-5}",
crossref = "ACM:2000:SHP",
pages = "45--46",
year = "2000",
bibdate = "Mon Feb 12 11:57:32 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap181.pdf",
acknowledgement = ack-nhfb,
}
@Article{Tran:2000:PPM,
author = "Viet D. Tran and Ladislav Hluchy and Giang T. Nguyen",
title = "Parallel Program Model for Distributed Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "250--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080250.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080250.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{VanVoorst:2000:CMI,
author = "Brian {Van Voorst} and Steven Seidel",
title = "Comparison of {MPI} Implementations on a Shared Memory
Machine",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1800",
pages = "847--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 09:16:18 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1800.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1800/18000847.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1800/18000847.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Varin:2000:PAL,
author = "E. Varin and R. Roy and G. Samba",
title = "Parallel Algorithms for the Least-Squares Finite
Element Solution of the Neutron Transport Equation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1908",
pages = "121--??",
year = "2000",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:30:27 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1908.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1908/19080121.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1908/19080121.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Vetter:2000:DST,
author = "Jeffrey S. Vetter and Bronis R. de Supinski",
title = "Dynamic Software Testing of {MPI} Applications with
{Umpire}",
crossref = "ACM:2000:SHP",
pages = "70--70",
year = "2000",
bibdate = "Mon Feb 12 11:57:45 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2000.org/proceedings/techpapr/papers/pap208.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{VidalMacia:2000:IPM,
author = "Antonio {Vidal Maci{\'a}} and Jos{\'e} Luis {P{\'e}rez
G{\'o}mez}",
title = "Introducci{\'o}n a la programaci{\'o}n en {MPI}.
({Spanish}) [{Introduction} to programming in {MPI}]",
type = "Technical report",
number = "{SPUPV-2000.209}",
institution = "Departamento de Sistemas Inform{\'a}ticos y
Computaci{\'o}n, Facultad de Inform{\'a}tica,
Universidad Polit{\'e}cnica de Valencia, Servicio de
Publicaciones",
address = "Valencia, Spain",
pages = "78",
year = "2000",
bibdate = "Wed Aug 27 06:35:39 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
language = "Spanish",
}
@Article{Wallcraft:2000:SOV,
author = "Alan J. Wallcraft",
title = "{SPMD} {OpenMP} versus {MPI} for ocean models",
journal = j-CPE,
volume = "12",
number = "12",
pages = "1155--1164",
month = oct,
year = "2000",
CODEN = "CPEXEI",
DOI = "https://doi.org/10.1002/1096-9128(200010)12:12<1155::AID-CPE532>3.0.CO;2-5",
ISSN = "1040-3108",
ISSN-L = "1040-3108",
bibdate = "Sat Apr 7 06:56:10 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1040-3108;
http://www.math.utah.edu/pub/tex/bib/cpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/76500353/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=76500353&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency, practice and experience",
}
@Article{Addison:2001:EOP,
author = "Cliff Addison",
title = "Exploiting {OpenMP} to Provide Scalable {SMP BLAS} and
{LAPACK} Routines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2073",
pages = "3--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:28 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20730003.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2073/20730003.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Al-Tawil:2001:PME,
author = "Khalid Al-Tawil and Csaba Andras Moritz",
title = "Performance Modeling and Evaluation of {MPI}",
journal = j-J-PAR-DIST-COMP,
volume = "61",
number = "2",
pages = "202--223",
day = "1",
month = feb,
year = "2001",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.2000.1677",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Feb 22 15:30:36 MST 2002",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1677;
http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1677/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1677/ref",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{AlHaddad:2001:UNW,
author = "Mohammed {Al Haddad} and Jerome Robinson",
title = "Using a Network of Workstations to Enhance Database
Query Processing Performance",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "352--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310352.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310352.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Allsopp:2001:EUM,
author = "Nicholas K. Allsopp and John F. Hague and Jean-Pierre
Prost",
title = "Experiences in Using {MPI--IO} on Top of {GPFS} for
the {IFS} Weather Forecast Code",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2150",
pages = "380--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:53 MST 2002",
bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001c.bib;
http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500380.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2150/21500380.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Angskun:2001:DPM,
author = "Thara Angskun and Putchong Uthayopas and Arnon
Rungsawang",
title = "Dynamic Process Management in {KSIX} Cluster
Middleware",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "209--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310209.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310209.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Anonymous:2001:AAL,
author = "Anonymous",
title = "Appendixes: Appendix {A}: {Linux}, {Windows NT},
{AIX}, {Solaris}; Appendix {B}: Compilers and
Preprocessors, {MPI} Implementations, Development
Environments, Debuggers, Performance Analyzers",
journal = j-IJHPCA,
volume = "15",
number = "2",
pages = "191--194",
month = "Summer",
year = "2001",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/109434200101500213",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue May 01 05:27:17 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/pdf/10.1177/109434200101500213",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
xxmonth = may,
}
@Article{Anonymous:2001:EDP,
author = "Anonymous",
title = "Erratum: Design and Prototype of a Performance Tool
Interface for {OpenMP}",
journal = j-J-SUPERCOMPUTING,
volume = "23",
number = "1",
pages = "105--128",
month = may,
year = "2001",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1023/A:1015741304337",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 6 12:13:23 MDT 2005",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=23&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=23&issue=1&spage=105",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Baiardi:2001:CRD,
author = "Fabrizio Baiardi and Paolo Mori and Laura Ricci",
title = "Collecting Remote Data in Irregular Problems with
Hierarchical Representation of the Domain",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "304--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310304.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310304.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Banikazemi:2001:MLE,
author = "Mohammad Banikazemi and Rama K. Govindaraju and Robert
Blackmore and Dhabaleswar K. Panda",
title = "{MPI-LAPI}: An Efficient Implementation of {MPI} for
{IBM RS\slash 6000 SP} Systems",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "12",
number = "10",
pages = "1081--1093",
month = oct,
year = "2001",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/71.963419",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Sat Feb 23 09:26:03 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/td/books/td2001/pdf/l1081.pdf;
http://www.computer.org/tpds/td2001/l1081abs.htm",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Baptista:2001:IOS,
author = "Tiago Baptista and Hernani Pedroso and Jo{\~a}o
Gabriel Silva",
title = "The Implementation of One-Sided Communications for
{WMPI II}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "61--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310061.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310061.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bencheva:2001:MPI,
author = "G. Bencheva",
title = "{MPI} Parallel Implementation of a Fast Separable
Solver",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2179",
pages = "454--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:06:22 MST 2002",
bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001c.bib;
http://link.springer-ny.com/link/service/series/0558/tocs/t2179.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2179/21790454.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2179/21790454.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Berthou:2001:COH,
author = "Jean-Yves Berthou and Eric Fayolle",
title = "Comparing {OpenMP}, {HPF}, and {MPI} Programming: a
Study Case",
journal = j-IJHPCA,
volume = "15",
number = "3",
pages = "297--309",
month = "Fall",
year = "2001",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Mon Nov 05 16:09:36 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Bhandarkar:2001:ALB,
author = "Milind Bhandarkar and L. V. Kal{\'e} and Eric de
Sturler and Jay Hoeflinger",
title = "Adaptive Load Balancing for {MPI} Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2074",
pages = "108--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:30 MST 2002",
bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001b.bib;
http://link.springer-ny.com/link/service/series/0558/tocs/t2074.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2074/20740108.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2074/20740108.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{biewski:2001:MOS,
author = "Maciej Go biewski and Jesper Larsson Tr{\"a}ff",
title = "{MPI-2} One-Sided Communications on a {Giganet SMP}
Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "16--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310016.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310016.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Blikberg:2001:NPA,
author = "Ragnhild Blikberg and Tor S{\o}revik",
title = "Nested parallelism: Allocation of threads to tasks and
{OpenMP} implementation",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "185--194",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C11%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Booth:2001:OML,
author = "Stephen Booth",
title = "Optimising the {MPI} Library for the {T3E}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2150",
pages = "80--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:53 MST 2002",
bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001c.bib;
http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500080.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2150/21500080.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bova:2001:PPM,
author = "Steve W. Bova and Clay P. Breshears and Henry Gabb and
Bob Kuhn and Bill Magro and Rudolf Eigenmann and Greg
Gaertner and Stefano Salvini and Howard Scott",
title = "Parallel Programming with Message Passing and
Directives",
journal = j-COMPUT-SCI-ENG,
volume = "3",
number = "5",
pages = "22--37",
month = sep # "\slash " # oct,
year = "2001",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/5992.947105",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Sat Feb 23 06:37:33 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://computer.org/cise/cs2001/c5022abs.htm;
http://dlib.computer.org/cs/books/cs2001/pdf/c5022.pdf",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Brunst:2001:POL,
author = "Holger Brunst and Hans-Christian Hoppe and Wolfgang E.
Nagel and Manuela Winkler",
title = "Performance Optimization for Large Scale Computing:
The Scalable {VAMPIR} Approach",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2074",
pages = "751--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:30 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2074.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2074/20740751.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2074/20740751.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bu:2001:PAC,
author = "Libor Bu and Pavel Tvrd{\'\i}k",
title = "A Parallel Algorithm for Connected Components on
Distributed Memory Machines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "280--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310280.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310280.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bubak:2001:PMS,
author = "Marian Bubak and W{\l}odzimierz Funika and Bartosz
Bali and Roland Wism{\"u}ller",
title = "Performance Measurement Support for {MPI} Applications
with {PATOP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1947",
pages = "288--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:51 MST 2002",
bibsource = "file://sunset.math.utah.edu/a/suncore0/export/home/0073/sy/beebe/tex/bib/lncs2001a.bib;
http://link.springer-ny.com/link/service/series/0558/tocs/t1947.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1947/19470288.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1947/19470288.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bull:2001:MSO,
author = "J. Mark Bull and Darragh O'Neill",
title = "A microbenchmark suite for {OpenMP 2.0}",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "41--48",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Cappello:2001:UPS,
author = "Franck Cappello and Olivier Richard and Daniel
Etiemble",
title = "Understanding performance of {SMP} clusters running
{MPI} programs",
journal = j-FUT-GEN-COMP-SYS,
volume = "17",
number = "6",
pages = "711--720",
month = apr,
year = "2001",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Feb 27 12:41:21 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/0167739X;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/19/19/45/33/30/abstract.html",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Caubet:2001:DTM,
author = "Jordi Caubet and Judit Gimenez and Jesus Labarta and
Luiz DeRose",
title = "A Dynamic Tracing Mechanism for Performance Analysis
of {OpenMP} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "53--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040053.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040053.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Book{Chandra:2001:PPO,
author = "Rohit Chandra and Leonardo Dagum and David Kohr and
Dror Maydan and Jeff McDonald and Ramesh Menon",
title = "Parallel Programming in {OpenMP}",
publisher = pub-MORGAN-KAUFMANN,
address = pub-MORGAN-KAUFMANN:adr,
pages = "xvi + 230",
year = "2001",
ISBN = "1-55860-671-8",
ISBN-13 = "978-1-55860-671-5",
LCCN = "QA76.642 .P38 2001",
bibdate = "Thu Jul 14 11:09:17 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib",
price = "US\$39.95",
URL = "http://www.mkp.com/books_catalog/catalog.asp?ISBN=1-55860-671-8",
abstract = "The rapid and widespread acceptance of shared memory
multiprocessor architectures has created a pressing
demand for an efficient way to program these systems.
At the same time, developers of technical and
scientific applications in industry and in government
laboratories find they need to parallelize huge volumes
of code in a portable fashion. OpenMP, developed
jointly by several parallel computing vendors to
address these issues, is an industry-wide standard for
programming shared-memory and distributed shared-memory
multiprocessors. It consists of a set of compiler
directives and library routines that extend FORTRAN, C,
and C++ codes to express shared-memory parallelism.
Parallel Programming in OpenMP is the first book to
teach both the novice and expert parallel programmers
how to program using this new standard. The authors,
who helped design and implement OpenMP while at SGI,
bring a depth and breadth to the book as compiler
writers, application developers, and performance
engineers.",
acknowledgement = ack-nhfb,
keywords = "parallel programming (computer science)",
tableofcontents = "Foreword \\
Preface \\
1: Introduction \\
Performance with OpenMP \\
A first glimpse of OpenMP \\
The OpenMP parallel computer \\
Why OpenMP \\
History of OpenMP \\
Navigating the rest of the book \\
2: Getting started with OpenMP \\
3: Exploiting loop-level parallelism \\
Meaning of the parallel do directive \\
Controlling data sharing \\
Removing data dependences \\
Enhancing performance \\
4: Beyond loop-level parallelism, parallel regions \\
5: Synchronization \\
6: Performance",
}
@Article{Chapman:2001:PDE,
author = "B. Chapman and O. Hernandez and A. Patil and A.
Prabhakar",
title = "Program Development Environment for {OpenMP} Programs
on {ccNUMA} Architectures",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2179",
pages = "210--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:06:22 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2179.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2179/21790210.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2179/21790210.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Chen:2001:FFT,
author = "Qun Chen and Michael C. Ferris",
title = "{FATCOP}: a Fault Tolerant {Condor--PVM} Mixed Integer
Programming Solver",
journal = j-SIAM-J-OPT,
volume = "11",
number = "4",
pages = "1019--1036",
month = mar # "\slash " # may,
year = "2001",
CODEN = "SJOPE8",
DOI = "https://doi.org/10.1137/S1052623499353911",
ISSN = "1052-6234 (print), 1095-7189 (electronic)",
ISSN-L = "1052-6234",
MRclass = "90C11 (65K05)",
MRnumber = "MR1855219 (2002f:90068)",
bibdate = "Sat Oct 4 12:16:05 MDT 2003",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SIOPT/11/4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
MathSciNet database",
URL = "http://epubs.siam.org/sam-bin/dbq/article/35391",
acknowledgement = ack-nhfb,
fjournal = "SIAM Journal on Optimization",
journal-URL = "http://epubs.siam.org/siopt",
}
@Article{Chen:2001:TMK,
author = "Yu Chen and Qian Fang and Zhihui Du and Sanli Li",
title = "{TH-MPI}: {OS} Kernel Integrated Fault Tolerant
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "75--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310075.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310075.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Czarnul:2001:DPD,
author = "Pawel Czarnul and Karen Tomko and Henryk Krawczyk",
title = "Dynamic Partitioning of the Divide-and-Conquer Scheme
with Migration in {PVM} Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "174--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310174.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310174.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Darema:2001:SMP,
author = "Frederica Darema",
title = "The {SPMD} Model: Past, Present and Future",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "1--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310001.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310001.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dehne:2001:CPD,
author = "Frank Dehne and Todd Eavis and Andrew Rau-Chaplin",
title = "Computing Partial Data Cubes for Parallel Data
Warehousing Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "319--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310319.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310319.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Demaine:2001:GCM,
author = "E. D. Demaine and I. Foster and C. Kesselman and M.
Snir",
title = "Generalized Communicators in the Message Passing
Interface",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "12",
number = "6",
pages = "610--616",
month = jun,
year = "2001",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/71.932714",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Jul 20 11:51:59 MDT 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/td/books/td2001/pdf/l0610.pdf;
http://www.computer.org/tpds/td2001/l0610abs.htm",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Denis:2001:THP,
author = "Alexandre Denis and Christian P{\'e}rez and Thierry
Priol",
title = "Towards High Performance {CORBA} and {MPI} Middlewares
for Grid Computing",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2242",
pages = "14--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:01 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2242.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2242/22420014.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2242/22420014.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{DiMartino:2001:WDS,
author = "Beniamino {Di Martino} and Sergio Briguglio and
Gregorio Vlad and Giuliana Fogaccia",
title = "Workload decomposition strategies for shared memory
parallel systems with {OpenMP}",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "109--122",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C5%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Elwasif:2001:AMT,
author = "Wael R. Elwasif and David E. Bernholdt and James A.
Kohl and G. A. Geist",
title = "An Architecture for a Multi-threaded Harness Kernel",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "126--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310126.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310126.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fagg:2001:FTM,
author = "Graham E. Fagg and Antonin Bukovsky and Jack J.
Dongarra",
title = "Fault Tolerant {MPI} for the {HARNESS} Meta-computing
System",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2073",
pages = "355--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:28 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20730355.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2073/20730355.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fagg:2001:HFT,
author = "Graham E. Fagg and Antonin Bukovsky and Jack J.
Dongarra",
title = "{HARNESS} and fault tolerant {MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "27",
number = "11",
pages = "1479--1495",
month = oct,
year = "2001",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 22 16:52:42 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/01678191;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/35/21/47/41/32/abstract.html;
http://www.elsevier.nl/gej-ng/10/35/21/47/41/32/article.pdf;
http://www.netlib.org/utk/people/JackDongarra/PAPERS/harness-ftmpi-pc.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Fagg:2001:PIS,
author = "Graham E. Fagg and Edgar Gabriel and Michael Resch and
Jack J. Dongarra",
title = "Parallel {IO} Support for Meta-computing Applications:
{MPI\_Connect IO} Applied to {PACX--MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "135--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310135.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310135.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ferschweiler:2001:CDP,
author = "Ken Ferschweiler and Mariacarla Calzarossa and Cherri
Pancake and Daniele Tessera and Dylan Keon",
title = "A Community Databank for Performance Tracefiles",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "233--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310233.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310233.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Field:2001:RTF,
author = "Antony J. Field and Thomas L. Hansen and Paul H. J.
Kelly",
title = "Run-Time Fusion of {MPI} Calls in a Parallel {C++}
Library",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2017",
pages = "363--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:03:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2017.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2017/20170363.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2017/20170363.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fischer:2001:DNM,
author = "Markus Fischer and Peter Kemper",
title = "Distributed Numerical {Markov} Chain Analysis",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "272--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310272.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310272.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Fischer:2001:SAN,
author = "Markus Fischer",
title = "System Area Network Extensions to the Parallel Virtual
Machine",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "98--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310098.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310098.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Friedel:2001:HMC,
author = "Peter Friedel and J{\"o}rg Bergmann and Stephan Seidl
and Wolfgang E. Nagel",
title = "An Hierarchical {MPI} Communication Model for the
Parallelized Solution of Multiple Integrals",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2110",
pages = "474--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:11 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2110.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2110/21100474.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2110/21100474.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gaito:2001:ADC,
author = "A. Gaito and M. Rak and U. Villano",
title = "Adding Dynamic Coscheduling Support to {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "106--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310106.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310106.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gallud:2001:EDF,
author = "J. A. Gallud and J. Garc{\'\i}a-Consuegra and J. M.
Garc{\'\i}a and L. Orozco",
title = "Evaluating the {DIPORSI} Framework: Distributed
Processing of Remotely Sensed Imagery",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "401--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310401.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310401.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Geist:2001:BFN,
author = "G. Al Geist",
title = "Building a Foundation for the Next {PVM}: {Petascale
Virtual Machines}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "2--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310002.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310002.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gerlach:2001:IOJ,
author = "Jens Gerlach and Zheng-Yu Jiang and Hans-Werner Pohl",
title = "Integrating {OpenMP} into {Janus}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "101--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040101.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040101.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gine:2001:MMM,
author = "Francesc Gin{\'e} and Francesc Solsona and Xavi
Navarro and Porfidio Hern{\'a}ndez and Emilio Luque",
title = "{MemTo}: a Memory Monitoring Tool for a {Linux}
Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "225--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310225.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310225.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Golbiewski:2001:MOS,
author = "Maciej Go{\l}biewski and Jesper Larsson Tr{\"a}ff",
title = "{MPI-2} One-Sided Communications on a {Giganet SMP}
Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "16--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310016.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310016.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gonzalez:2001:DSP,
author = "M. Gonzalez and E. Ayguad{\'e} and X. Martorell and J.
Labarta",
title = "Defining and Supporting Pipelined Executions in
{OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "155--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040155.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040155.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gonzalez:2001:MIM,
author = "J. A. Gonz{\'a}lez and C. Le{\'o}n and C.
Rodr{\'\i}guez and F. Sande",
title = "A Model to Integrate Message Passing and Shared Memory
Programming",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "114--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310114.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310114.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gonzalez:2001:OET,
author = "Marc Gonzalez and Jose Oliver and Xavier Martorell and
Eduard Ayguade and Jesus Labarta and Nacho Navarro",
title = "{OpenMP} Extensions for Thread Groups and Their
Run-Time Support",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2017",
pages = "324--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:03:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2017.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2017/20170324.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2017/20170324.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gorzig:2001:CCP,
author = "Steffen G{\"o}rzig",
title = "{CPPvm} --- {C++} and {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "83--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310083.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310083.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2001:CSA,
author = "William D. Gropp",
title = "Challenges and Successes in Achieving the Potential of
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "7--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310007.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310007.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2001:LSM,
author = "William D. Gropp",
title = "Learning from the Success of {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2228",
pages = "81--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:07:14 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2228.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2228/22280081.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2228/22280081.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hoeflinger:2001:IPV,
author = "Jay Hoeflinger and Bob Kuhn and Wolfgang Nagel and
Paul Petersen and Hrabri Rajic and Sanjiv Shah and Jeff
Vetter and Michael Voss and Renee Woo",
title = "An Integrated Performance Visualizer for {MPI\slash
OpenMP} Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "40--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040040.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040040.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hoeflinger:2001:PSP,
author = "Jay Hoeflinger and Prasad Alavilli and Thomas Jackson
and Bob Kuhn",
title = "Producing scalable performance with {OpenMP}:
{Experiments} with two {CFD} applications",
journal = j-PARALLEL-COMPUTING,
volume = "27",
number = "4",
pages = "391--413",
month = mar,
year = "2001",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Jul 18 06:31:15 MDT 2001",
bibsource = "http://www.elsevier.com/locate/issn/01678191;
http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.nl/gej-ng/10/35/21/47/28/26/abstract.html;
http://www.elsevier.nl/gej-ng/10/35/21/47/28/26/article.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Hu:2001:PCC,
author = "Hong Hu and Edward L. Turner",
title = "Parallel {CFD} Computing Using Shared Memory
{OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2073",
pages = "1137--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:28 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20731137.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2073/20731137.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Huband:2001:DTB,
author = "Simon Huband and Chris McDonald",
title = "{DEPICT}: a Topology-Based Debugger for {MPI}
Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2026",
pages = "109--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:03:43 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2026.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2026/20260109.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2026/20260109.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Huse:2001:LST,
author = "Lars Paul Huse",
title = "Layering {SHMEM} on Top of {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "44--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310044.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310044.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ilroy:2001:IMP,
author = "Jonathan Ilroy and Cyrille Randriamaro and Gil Utard",
title = "Improving {MPI-I/O} Performance on {PVFS}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2150",
pages = "911--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:53 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2150.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2150/21500911.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2150/21500911.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Iwama:2001:PLS,
author = "Kazuo Iwama and Daisuke Kawai and Shuichi Miyazaki and
Yasuo Okabe and Jun Umemoto",
title = "Parallelizing Local Search for {CNF} Satisfiability
Using Vectorization and {PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1982",
pages = "123--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:03:03 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1982.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1982/19820123.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1982/19820123.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Jorba:2001:SFF,
author = "Josep Jorba and Tom{\`a}s Margalef and Emilio Luque",
title = "Simulation of Forest Fire Propagation on Parallel {\&}
Distributed {PVM} Platforms",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "386--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310386.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310386.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kaiser:2001:OCC,
author = "Timothy H. Kaiser and Scott B. Baden",
title = "Overlapping communication and computation with
{OpenMP} and {MPI}",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "73--81",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C2%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Kambites:2001:OLI,
author = "M. E. Kambites and J. Obdr{\v{z}}{\'a}lek and J. M.
Bull",
title = "An {OpenMP}-like interface for parallel programming in
{Java}",
journal = j-CCPE,
volume = "13",
number = "8--9",
pages = "793--814",
month = jul # "\slash " # aug,
year = "2001",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.579",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Jul 25 10:55:47 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/84503220/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=84503220&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Kasahara:2001:ACG,
author = "Hironori Kasahara and Motoki Obata and Kazuhisa
Ishizaka",
title = "Automatic Coarse Grain Task Parallel Processing on
{SMP} Using {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2017",
pages = "189--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:03:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2017.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2017/20170189.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2017/20170189.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kobler:2001:DOP,
author = "Rene Kobler and Dieter Kranzlm{\"u}ller and Jens
Volkert",
title = "Debugging {OpenMP} Programs Using Event Manipulation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "81--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040081.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040081.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Konstantinou:2001:TTO,
author = "Dimitris Konstantinou and Nectarios Koziris and George
Papakonstantinou",
title = "{TOPPER}: a Tool for Optimizing the Performance of
Parallel Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "148--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310148.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310148.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kranzlmuller:2001:IRM,
author = "Dieter Kranzlm{\"u}ller and Christian
Schaubschl{\"a}ger and Jens Volkert",
title = "An Integrated Record{\&}Replay Mechanism for
Nondeterministic Message Passing Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "192--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310192.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310192.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Krawczyk:2001:PIM,
author = "Henryk Krawczyk and Jamil Saif",
title = "Parallel Image Matching on {PC} Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "312--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310312.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310312.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kucukboyaci:2001:PPT,
author = "Vefa Kucukboyaci and Alireza Haghighat and Glenn E.
Sjoden",
title = "Performance of {PENTRAN TM} {$3$-D} Parallel Particle
Transport Code on the {IBM SP2} and {PCTRAN} Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "36--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310036.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310036.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kusano:2001:OOC,
author = "Kazuhiro Kusano and Mitsuhisa Sato and Takeo Hosomi
and Yoshiki Seo",
title = "The {Omni OpenMP} Compiler on the Distributed Shared
Memory of {Cenju-4}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "20--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040020.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040020.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Labarta:2001:NOD,
author = "J. Labarta and J. Oliver and D. S. Henty and Eduard
Ayguad{\'e}",
title = "New {OpenMP} directives for irregular data access
loops",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "175--183",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C10%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
xxauthor = "J. Labarta and E. Ayguad{\'e} and J. Oliver and
others",
}
@Article{Laforenza:2001:PHP,
author = "Domenico Laforenza",
title = "Programming High Performance Applications in Grid
Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "8--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310008.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310008.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Lee:2001:APT,
author = "D. J. Lee and T. J. Downar",
title = "The Application of {POSIX} Threads and {OpenMP} to the
{U.S. NRC} Neutron Kinetics Code {PARCS}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "90--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040090.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040090.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Li:2001:PCS,
author = "Michael Na Li and A. J. Rossini",
title = "\pkg{RPVM}: Cluster Statistical Computing in {R}",
journal = j-R-NEWS,
volume = "1",
number = "3",
pages = "4--7",
month = sep,
year = "2001",
CODEN = "????",
ISSN = "1609-3631",
ISSN-L = "1609-3631",
bibdate = "Thu Aug 13 09:25:10 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/rjournal.bib",
URL = "http://CRAN.R-project.org/doc/Rnews/",
acknowledgement = ack-r-project,
fjournal = "R News: the Newsletter of the R Project",
journal-URL = "http://journal.r-project.org/",
pdf = Rnews2001-3,
}
@Article{Li:2001:WMB,
author = "Maozhen Li and Omer F. Rana and David W. Walker",
title = "Wrapping {MPI}-based legacy codes as {Java\slash
CORBA} components",
journal = j-FUT-GEN-COMP-SYS,
volume = "18",
number = "2",
pages = "213--223",
month = oct,
year = "2001",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Feb 27 12:41:22 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/0167739X;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/19/19/60/31/29/abstract.html",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Luecke:2001:SPO,
author = "Glenn R. Luecke and Wei-Hua Lin",
title = "Scalability and performance of {OpenMP} and {MPI} on a
128-processor {SGI Origin 2000}",
journal = j-CCPE,
volume = "13",
number = "10",
pages = "905--928",
day = "25",
month = aug,
year = "2001",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.588",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Feb 25 14:51:23 MST 2002",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/85007180/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=85007180&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Luo:2001:PDE,
author = "Jun Luo and Sanguthevar Rajasekaran and Chenxia Qiu",
title = "Parallizing $1$-Dimensional Estuarine Model",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "257--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310257.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310257.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Macias:2001:PPA,
author = "Elsa M. Mac{\'\i}as and Alvaro Su{\'a}rez and C. N.
Ojeda-Guerra and E. Robayna",
title = "Programming Parallel Applications with {LAMGAC} in a
{LAN--WLAN} Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "158--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310158.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310158.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Malfetti:2001:AOW,
author = "Paolo Malfetti",
title = "Application of {OpenMP} to weather, wave and ocean
codes",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "99--107",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C4%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Manis:2001:PNP,
author = "G. Manis",
title = "Persistent and Non-persistent Data Objects on Top of
{PVM} and {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "91--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310091.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310091.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Matthey:2001:EMO,
author = "T. Matthey and J. P. Hansen",
title = "Evaluation of {MPI}'s One-Sided Communication
Mechanism for Short-Range Molecular Dynamics on the
{Origin2000}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1947",
pages = "356--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:51 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1947.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1947/19470356.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1947/19470356.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mattson:2001:EO,
author = "Timothy Mattson",
title = "The Evolution of {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1947",
pages = "19--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:51 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1947.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1947/19470019.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1947/19470019.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Matuszek:2001:APS,
author = "Mariusz R. Matuszek",
title = "Assessment of {PVM} Suitability to Testbed
Client-Agent-Server Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "69--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310069.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310069.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Michailidis:2001:TSH,
author = "Panagiotis D. Michailidis and Konstantinos G.
Margaritis",
title = "Text Searching on a Heterogeneous Cluster of
Workstations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "378--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310378.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310378.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Min:2001:PCO,
author = "Seung Jai Min and Seon Wook Kim and Michael Voss and
Sang Ik Lee and Rudolf Eigenmann",
title = "Portable Compilers for {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "11--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040011.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040011.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Moore:2001:RPA,
author = "Shirley Moore and David Cronk and Kevin London and
Jack Dongarra",
title = "Review of Performance Analysis Tools for {MPI}
Parallel Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "241--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310241.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310241.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Moreno:2001:AEP,
author = "Luz Marina Moreno and Francisco Almeida and Daniel
Gonz{\'a}lez and Casiano Rodr{\'\i}guez",
title = "Adaptive Execution of Pipelines",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "217--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310217.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310217.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Muller:2001:SSO,
author = "Matthias M{\"u}ller",
title = "Some Simple {OpenMP} Optimization Techniques",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "31--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040031.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040031.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Neophytou:2001:NDW,
author = "Neophytos Neophytou and Paraskevas Evripidou",
title = "{Net-dbx}: a {Web}-Based Debugger of {MPI} Programs
Over Low-Bandwidth Lines",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "12",
number = "9",
pages = "986--995",
month = sep,
year = "2001",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/71.954636",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Sat Feb 23 09:26:03 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/td/books/td2001/pdf/l0986.pdf;
http://www.computer.org/tpds/td2001/l0986abs.htm",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Nicolescu:2001:DTP,
author = "Cristina Nicolescu and Pieter Jonker",
title = "A Data and Task Parallel Image Processing
Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "393--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310393.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310393.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nikolopoulos:2001:EMA,
author = "D. S. Nikolopoulos and E. Artiaga and E. Ayguad{\'e}
and J. Labarta",
title = "Exploiting memory affinity in {OpenMP} through
schedule reuse",
journal = j-COMP-ARCH-NEWS,
volume = "29",
number = "5",
pages = "49--55",
month = dec,
year = "2001",
CODEN = "CANED2",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri May 12 09:41:22 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Nikolopoulos:2001:SID,
author = "Dimitrios S. Nikolopoulos and Eduard Ayguad{\'e}",
title = "A Study of Implicit Data Distribution Methods for
{OpenMP} Using the {SPEC} Benchmarks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "115--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040115.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040115.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Okulicka-Dluzewska:2001:PFE,
author = "Felicja Okulicka-D{\l}uzewska",
title = "Parallelization of Finite Element Package by {MPI}
Library",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "427--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310427.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310427.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ong:2001:SUC,
author = "Emil Ong and Ewing Lusk and William Gropp",
title = "Scalable {Unix} Commands for Parallel Processors: a
High-Performance Implementation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "410--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310410.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310410.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Pagourtzis:2001:PCT,
author = "Aris Pagourtzis and Igor Potapov and Wojciech Rytter",
title = "{PVM} Computation of the Transitive Closure: The
Dependency Graph Approach",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "249--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310249.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310249.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Papadopoulos:2001:NRC,
author = "Philip M. Papadopoulos and Mason J. Katz and Greg
Bruno",
title = "{NPACI} Rocks Clusters: Tools for Easily Deploying and
Maintaining Manageable High-Performance {Linux}
Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "10--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310010.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310010.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Park:2001:CSL,
author = "So-Hee Park and Mi-Young Park and Yong-Kee Jun",
title = "A Comparison of Scalable Labeling Schemes for
Detecting Races in {OpenMP} Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "68--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040068.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040068.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Park:2001:PPE,
author = "Insung Park and Michael J. Voss and Seon Wook Kim and
Rudolf Eigenmann",
title = "Parallel programming environment for {OpenMP}",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "143--161",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C8%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
xxpages = "143--162",
}
@Article{Pears:2001:DLB,
author = "Arnold N. Pears and Nicola Thong",
title = "A Dynamic Load Balancing Architecture for {PDES} Using
{PVM} on Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "166--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310166.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310166.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Pedroso:2001:WLE,
author = "Hern{\^a}ni Pedroso and Jo{\~a}o Gabriel Silva",
title = "The {WMPI} Library Evolution: Experience with {MPI}
Development for {Windows} Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1900",
pages = "1157--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:44 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19001157.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1900/19001157.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Petcu:2001:WMM,
author = "Dana Petcu",
title = "Working with Multiple {Maple} Kernels Connected by
{Distributed Maple} or {PVMaple}",
type = "Technical report",
institution = "Westers University of Timisoara",
address = "Timisoara, Romania",
month = mar,
year = "2001",
bibdate = "Wed Dec 17 18:07:37 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.risc.uni-linz.ac.at/software/distmaple/index_1.html",
URL = "http://www.risc.uni-linz.ac.at/software/distmaple/misc/petcu2001.ps.gz",
acknowledgement = ack-nhfb,
keywords = "Distributed Maple; PVMaple",
}
@Article{Plagianakos:2001:LCP,
author = "V. P. Plagianakos and N. K. Nousis and M. N.
Vrahatis",
title = "Locating and computing in parallel all the simple
roots of special functions using {PVM}",
journal = j-J-COMPUT-APPL-MATH,
volume = "133",
number = "1--2",
pages = "545--554",
day = "1",
month = aug,
year = "2001",
CODEN = "JCAMDI",
DOI = "https://doi.org/10.1016/S0377-0427(00)00675-0",
ISSN = "0377-0427 (print), 1879-1778 (electronic)",
ISSN-L = "0377-0427",
bibdate = "Sat Feb 25 12:45:19 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/elefunt.bib;
http://www.math.utah.edu/pub/tex/bib/jcomputapplmath2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0377042700006750",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational and Applied Mathematics",
journal-URL = "http://www.sciencedirect.com/science/journal/03770427",
}
@Article{Plunkett:2001:AMD,
author = "Craig L. Plunkett and Alfred G. Striz and J.
Sobieszczanski-Sobieski",
title = "Application of {MPI} in Displacement Based Multilevel
Structural Optimization",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "335--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310335.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310335.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Pringle:2001:TPF,
author = "Gavin J. Pringle and Steven P. Booth and Hugh M. P.
Couchman and Frazer R. Pearce and Alan D. Simpson",
title = "Towards a Portable, Fast Parallel {AP$^3$M-SPH} Code:
{HYDRA\_MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "360--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310360.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310360.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Prost:2001:MIG,
author = "Jean-Pierre Prost and Richard Treumann and Richard
Hedges and Bin Jia and Alice Koniges",
title = "{MPI-IO\slash GPFS}, an Optimized Implementation of
{MPI-IO} on top of {GPFS}",
crossref = "ACM:2001:SHP",
pages = "??--??",
year = "2001",
bibdate = "Sat Feb 10 14:28:55 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2001.org/papers/pap.pap186.pdf",
acknowledgement = ack-nhfb,
pagecount = "15",
}
@Article{Prost:2001:THP,
author = "Jean-Pierre Prost and Richard Treumann and Richard
Hedges and Alice Koniges and Alison White",
title = "Towards a High-Performance Implementation of {MPI--IO}
on Top of {GPFS}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1900",
pages = "1253--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:44 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19001253.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1900/19001253.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Protopopov:2001:MMP,
author = "Boris V. Protopopov and Anthony Skjellum",
title = "A Multithreaded {Message Passing Interface (MPI)}
Architecture: Performance and Program Issues",
journal = j-J-PAR-DIST-COMP,
volume = "61",
number = "4",
pages = "449--466",
day = "1",
month = apr,
year = "2001",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1006/jpdc.2000.1674",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Feb 22 15:30:36 MST 2002",
bibsource = "http://www.idealibrary.com/servlet/useragent?func=showAllIssues&curIssueID=jpdc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1674;
http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1674/pdf;
http://www.idealibrary.com/links/doi/10.1006/jpdc.2000.1674/ref",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Rabenseifner:2001:ECF,
author = "Rolf Rabenseifner and Alice E. Koniges",
title = "Effective Communication and File-{I/O} Bandwidth
Benchmarks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "24--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310024.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310024.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Rageb:2001:CEM,
author = "Khaled Rageb and Wolfgang Rehm",
title = "{CHEMPI}: efficient {MPI} for {VIA\slash SCI}",
type = "{Preprint-Reihe des Chemnitzer}",
number = "{SFB 393}",
institution = "Technische Universit{\"a}t Chemnitz",
address = "Chemnitz, Germany",
pages = "12",
year = "2001",
bibdate = "Wed Aug 27 06:45:29 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Reinefeld:2001:CDI,
author = "Alexander Reinefeld",
title = "Clusters for Data-Intensive Applications in the Grid",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "12--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310012.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310012.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Reussner:2001:APP,
author = "Ralf Reussner and Gunnar Hunzelmann",
title = "Achieving Performance Portability with {SKaMPI} for
High-Performance {MPI} Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2074",
pages = "841--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:30 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2074.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2074/20740841.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2074/20740841.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Reussner:2001:SSK,
author = "Ralf H. Reussner",
title = "{SKaMPI}: the special {Karlsruher} {MPI}-benchmark:
user manual",
type = "{Interner Bericht}",
number = "99,02",
institution = "Fakult{\"a}t f{\"u}r Informatik, Universit{\"a}t
Karlsruhe",
address = "Karlsruhe, Germany",
pages = "78",
year = "2001",
bibdate = "Wed Aug 27 06:47:26 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Roig:2001:EMM,
author = "Concepci{\'o} Roig and Ana Ripoll and Javier
Borr{\'a}s and Emilio Luque",
title = "Efficient Mapping for Message-Passing Applications
Using the {TTIG} Model: a Case Study in Image
Processing",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "370--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310370.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310370.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Roussos:2001:BMB,
author = "George Roussos and B. J. C. Baxter",
title = "Biharmonic Many Body Calculations for Fast Evaluation
of Radial Basis Function Interpolants in Cluster
Environments",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "288--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310288.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310288.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Rungsawang:2001:LCP,
author = "A. Rungsawang and A. Laohakanniyom and M.
Lertprasertkune",
title = "Low-Cost Parallel Text Retrieval Using {PC}-Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "419--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310419.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310419.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sahimi:2001:AAS,
author = "Mohd Salleh Sahimi and Norma Alias and Elankovan
Sundararajan",
title = "The {AGEB} Algorithm for Solving the Heat Equation in
Three Space Dimensions and Its Parallelization Using
{PVM}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2073",
pages = "918--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:28 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2073.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2073/20730918.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2073/20730918.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sato:2001:CEO,
author = "Mitsuhisa Sato and Hiroshi Harada and Atsushi Hasegawa
and Yutaka Ishikawa",
title = "Cluster-enabled {OpenMP}: An {OpenMP} compiler for the
{SCASH} software distributed shared memory system",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "123--130",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C6%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Sato:2001:OGR,
author = "Mitsuhisa Sato and Motonari Hirano and Yoshio Tanaka
and Satoshi Sekiguchi",
title = "{OmniRPC}: a {Grid} {RPC} Facility for Cluster and
Global Computing in {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2104",
pages = "130--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:04 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2001b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2104/21040130.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2104/21040130.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Satoh:2001:COT,
author = "Shigehisa Satoh and Kazuhiro Kusano and Mitsuhisa
Sato",
title = "Compiler optimization techniques for {OpenMP}
programs",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "131--142",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C7%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Schevtschenko:2001:PAS,
author = "I. V. Schevtschenko",
title = "A Parallel {ADI} and Steepest Descent Methods",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "265--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310265.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310265.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Shan:2001:CMS,
author = "Hongzhang Shan and Jaswinder Pal Singh",
title = "A Comparison of {MPI}, {SHMEM} and Cache-Coherent
Shared Address Space Programming Models on a
Tightly-Coupled Multiprocessors",
journal = j-INT-J-PARALLEL-PROG,
volume = "29",
number = "3",
pages = "283--318",
month = jun,
year = "2001",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Feb 20 09:55:15 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ipsapp009.lwwonline.com/content/getfile/4773/21/3/abstract.htm;
http://ipsapp009.lwwonline.com/content/getfile/4773/21/3/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Skjellum:2001:OOA,
author = "Anthony Skjellum and Diane G. Wooley and Ziyang Lu and
Michael Wolf and Purushotham V. Bangalore and Andrew
Lumsdaine and Jeffrey M. Squyres and Brian McCandless",
title = "Object-oriented analysis and design of the {Message
Passing Interface}",
journal = j-CCPE,
volume = "13",
number = "4",
pages = "245--292",
day = "10",
month = apr,
year = "2001",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.556",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Jul 25 10:55:46 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/78502300/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=78502300&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Smith:2001:DMM,
author = "Lorna Smith and Mark Bull",
title = "Development of mixed mode {MPI\slash OpenMP}
applications",
journal = j-SCI-PROG,
volume = "9",
number = "2--3",
pages = "83--98",
month = "Spring--Summer",
year = "2001",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Thu Mar 28 12:27:27 MST 2002",
bibsource = "Compendex database;
http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib;
OCLC Article1st database",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=7pab6qgbaf8vxg991rwy%26referrer=parent%26backto=issue%2C3%2C11%3Bjournal%2C1%2C9%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Solsona:2001:IEI,
author = "Francesc Solsona and Francesc Gin{\'e} and Porfidio
Hern{\'a}ndez and Emilio Luque",
title = "Implementing Explicit and Implicit Coscheduling in a
{PVM} Environment (Research Note)",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1900",
pages = "1165--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:44 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19001165.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1900/19001165.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{SousaPinto:2001:PEI,
author = "Jorge {Sousa Pinto}",
title = "Parallel Evaluation of Interaction Nets with {MPINE}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2051",
pages = "353--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:04:07 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2051.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2051/20510353.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2051/20510353.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sunderam:2001:CAP,
author = "Vaidy Sunderam and Zsolt N{\'e}meth",
title = "A Comparative Analysis of {PVM\slash MPI} and
Computational {Grids}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "14--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310014.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310014.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Suppi:2001:PCS,
author = "Remo Suppi and Fernando Cores and Emilio Luque",
title = "{PDES}: a Case Study Using the Switch Time Warp",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "327--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310327.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310327.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Swann:2001:SPC,
author = "Christopher A. Swann",
title = "Software for parallel computing: the {LAM}
implementation of {MPI}",
journal = j-J-APPL-ECONOMETRICS,
volume = "16",
number = "2",
pages = "185--194",
month = mar # "--" # apr,
year = "2001",
CODEN = "JAECET",
DOI = "https://doi.org/10.1002/jae.595",
ISSN = "0883-7252 (print), 1099-1255 (electronic)",
ISSN-L = "0883-7252",
bibdate = "Sat Mar 9 10:20:01 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jappleconometrics.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Journal of Applied Econometrics",
journal-URL = "https://onlinelibrary.wiley.com/journal/10991255;
https://www.jstor.org/journal/japplecon",
onlinedate = "23 April 2001",
}
@Article{Takeda:2001:AME,
author = "K. Takeda and N. K. Allsopp and J. C. Hardwick and P.
C. Macey and D. A. Nicole and S. J. Cox and D. J.
Lancaster",
title = "An Assessment of {MPI} Environments for {Windows NT}",
journal = j-J-SUPERCOMPUTING,
volume = "19",
number = "3",
pages = "315--323",
month = jul,
year = "2001",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 25 09:05:33 MDT 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/issuetoc.htm/0920-8542+19+3+2001",
URL = "http://www.wkap.nl/oasis.htm/338207",
abstract = "In this paper we evaluate the MPI environments
currently available for Windows NT on the Intel IA32
and Compaq DEC Alpha architectures. We present
benchmark results for low-level communication and for
the NAS Parallel Benchmarks to allow comparison with
other systems, but our primary interest is determining
real application performance and robustness in
production cluster environments. For this we use
PAFEC-FE, a large FORTRAN code for finite-element
analysis. We present results from three MPI
implementations, two architectures, and three
networking technologies (10 and 100 Mbit/s Ethernet and
1 Gbit/s Myrinet).",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Tinetti:2001:HNW,
author = "Fernando Tinetti and Antonio Quijano and Armando {De
Giusti} and Emilio Luque",
title = "Heterogeneous Networks of Workstations and the
Parallel Matrix Multiplication",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "296--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310296.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310296.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tourancheau:2001:SMN,
author = "Bernard Tourancheau and Roland Westrelin",
title = "Support for {MPI} at the Network Interface Level",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "52--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310052.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310052.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Trobec:2001:IEM,
author = "R. Trobec and M.{\v{S}}terk and M. Praprotnik and D.
Jane{\v{z}}i{\v{c}}",
title = "Implementation and evaluation of {MPI}-based parallel
{MD} program",
journal = j-IJQC,
volume = "84",
number = "1",
pages = "23--31",
month = "????",
year = "2001",
CODEN = "IJQCB2",
DOI = "https://doi.org/10.1002/qua.1303",
ISSN = "0020-7608 (print), 1097-461X (electronic)",
ISSN-L = "0020-7608",
bibdate = "Wed Jul 25 09:32:26 MDT 2001",
bibsource = "http://www.interscience.wiley.com/jpages/0020-7608;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/84002438/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext/84002438/FILE?TPL=ftx_start;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=84002438&PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Quantum Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0020-7608/",
}
@Article{Uthayopas:2001:FSR,
author = "Putchong Uthayopas and Sugree Phatanapherom",
title = "Fast and Scalable Real-Time Monitoring System for
{Beowulf} Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "201--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310201.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310201.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Walker:2001:DLB,
author = "Reginald L. Walker",
title = "Dynamic Load Balancing Model: Preliminary Results for
Parallel Pseudo-search Engine Indexers\slash Crawler
Mechanisms Using {MPI} and Genetic Programming",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1981",
pages = "61--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:03:02 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1981.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1981/19810061.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1981/19810061.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Walker:2001:SEC,
author = "Reginald L. Walker",
title = "Search engine case study: searching the {Web} using
genetic programming and {MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "27",
number = "1--2",
pages = "71--89",
month = jan,
year = "2001",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Jul 18 06:31:14 MDT 2001",
bibsource = "http://www.elsevier.com/locate/issn/01678191;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.nl/gej-ng/10/35/21/47/25/25/abstract.html;
http://www.elsevier.nl/gej-ng/10/35/21/47/25/25/article.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Wismuller:2001:UMT,
author = "Roland Wism{\"u}ller",
title = "Using Monitoring Techniques to Support the Cooperation
of Software Components",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "183--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310183.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310183.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wolf:2001:APA,
author = "Felix Wolf and Bernd Mohr",
title = "Automatic Performance Analysis of {MPI} Applications
Based on Event Traces",
journal = j-LECT-NOTES-COMP-SCI,
volume = "1900",
pages = "123--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:02:44 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t1900.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/1900/19000123.htm;
http://link.springer-ny.com/link/service/series/0558/papers/1900/19000123.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wu:2001:PCS,
author = "Guang Jun Wu and Robert Roy",
title = "Parallelization of Characteristics Solvers for {$3$D}
Neutron Transport",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2131",
pages = "344--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Fri Feb 1 08:13:55 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2131/21310344.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2131/21310344.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Yero:2001:JOO,
author = "Eduardo J. H. Yero and Marco A. A. Henriques and
Javier R. Garc{\"\i}a and Alina C. Leyva",
title = "{JOINT}: An Object Oriented Message Passing Interface
for Parallel Programming in {Java}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2110",
pages = "637--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:11 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2110.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2110/21100637.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2110/21100637.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Zhang:2001:PPV,
author = "Xin Zhang and Lingli Ding and Elke A. Rundensteiner",
title = "{PVM}: {Parallel View Maintenance} under Concurrent
Data Updates of Distributed Sources",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2114",
pages = "230--??",
year = "2001",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Feb 2 13:05:16 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2114.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2114/21140230.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2114/21140230.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Zoltani:2001:EPO,
author = "Csaba K. Zoltani and Punyam Satya-narayana and Dixie
Hisley",
title = "Evaluating Performance of {OpenMP} and {MPI} on the
{SGI Origin 2000} with Benchmarks of Realistic Problem
Sizes",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "4",
number = "4",
pages = "??--??",
month = dec,
year = "2001",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/4/4.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{Acacio:2002:MDM,
author = "M. Acacio and O. C{\'a}novas and J. M. Garc{\'\i}a and
P. E. L{\'o}pez-de-Teruel",
title = "{MPI-Delphi}: an {MPI} implementation for visual
programming environments and heterogeneous computing",
journal = j-FUT-GEN-COMP-SYS,
volume = "18",
number = "3",
pages = "317--333",
month = jan,
year = "2002",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Feb 27 12:41:22 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/0167739X;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/19/19/60/32/28/abstract.html",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Bane:2002:EOA,
author = "M. K. Bane and G. D. Riley",
title = "Extended Overhead Analysis for {OpenMP} (Research
Note)",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2400",
pages = "162--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:10:14 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000162.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2400/24000162.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Basumallik:2002:TOE,
author = "Ayon Basumallik and Seung-Jai Min and Rudolf
Eigenmann",
title = "Towards {OpenMP} Execution on Software Distributed
Shared Memory Systems",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "457--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270457.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270457.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bekas:2002:PCP,
author = "Constantine Bekas and Efrosini Kokiopoulou and
Efstratios Gallopoulos and Valeria Simoncini",
title = "Parallel Computation of Pseudospectra Using Transfer
Functions on a {MATLAB-MPI} Cluster Platform",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "199--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740199.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740199.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bisseling:2002:FMF,
author = "Georg Bi{\ss}eling and Hans-Christian Hoppe and
Alexander Supalov and Pierre Lagier and Jean Latour",
title = "{Fujitsu MPI-2}: Fast Locally, Reaching Globally",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "401--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740401.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740401.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Blanco:2002:PMA,
author = "V. Blanco and L. Garc{\'\i}a and J. A. Gonz{\'a}lez
and C. Rodr{\'\i}guez and G. Rodr{\'\i}guez",
title = "A Performance Model for the Analysis of {OpenMP}
Programs",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "139--151",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@InProceedings{Bosilca:2002:MVT,
author = "George Bosilca and Aurelien Bouteiller and Franck
Cappello and Samir Djilali and Gilles Fedak and Cecile
Germain and Thomas Herault and Pierre Lemarinier and
Oleg Lodygensky and Frederic Magniette and Vincent Neri
and Anton Selikhov",
title = "{MPICH-V}: Toward a Scalable Fault Tolerant {MPI} for
Volatile Nodes",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap298.pdf",
abstract = "Global Computing platforms, large scale clusters and
future TeraGRID systems gather thousands of nodes for
computing parallel scientific applications. At this
scale, node failures or disconnections are frequent
events. This Volatility reduces the MTBF of the whole
system in the range of hours or minutes. We present
MPICH-V, an automatic Volatility tolerant MPI
environment based on uncoordinated checkpoint/ rollback
and distributed message logging. MPICH-V architecture
relies on Channel Memories, Checkpoint servers and
theoretically proven protocols to execute existing or
new, SPMD and Master-Worker MPI applications on
volatile nodes. To evaluate its capabilities, we run
MPICH-V within a framework for which the number of
nodes, Channels Memories and Checkpoint Servers can be
completely configured as well as the node Volatility.
We present a detailed performance evaluation of every
component of MPICH-V and its global performance for
non-trivial parallel applications. Experimental results
demonstrate good scalability and high tolerance to node
volatility.",
acknowledgement = ack-nhfb,
}
@Article{Brightwell:2002:DIM,
author = "Ron Brightwell and Arthur B. Maccabe and Rolf Riesen",
title = "Design and Implementation of {MPI} on {Portals 3.0}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "331--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740331.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740331.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Brightwell:2002:RMR,
author = "Ron Brightwell",
title = "Ready-Mode Receive: An Optimized Receive Function for
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "385--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740385.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740385.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Calderon:2002:IMI,
author = "Alejandro Calder{\'o}n and F{\'e}lix Garc{\'\i}a and
Jes{\'u}s Carretero and Jose M. P{\'e}rez and Javier
Fern{\'a}ndez",
title = "An Implementation of {MPI-IO} on Expand: a Parallel
File System Based on {NFS} Servers",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "306--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740306.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740306.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Chapman:2002:APU,
author = "B. Chapman and F. Bregier and A. Patil and A.
Prabhakar",
title = "Achieving performance under {OpenMP} on {ccNUMA} and
software distributed shared memory systems",
journal = j-CCPE,
volume = "14",
number = "8--9",
pages = "713--739",
month = jul # "\slash " # aug,
year = "2002",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.646",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Nov 9 12:24:19 MST 2002",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/95016122/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=95016122{\&}PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Chapman:2002:PAD,
author = "Barbara Chapman",
title = "Parallel Application Development with the Hybrid {MPI
$+$ OpenMP} Programming Model",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "13--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740013.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740013.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Cotronis:2002:MMP,
author = "Yiannis Cotronis and Zacharias Tsiatsoulis",
title = "Modular {MPI} and {PVM} Components",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "252--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740252.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740252.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Czarnul:2002:DTI,
author = "Pawel Czarnul",
title = "Development and Tuning of Irregular Divide-and-Conquer
Applications in {DAMPVM\slash DAC}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "208--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740208.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740208.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{DeRose:2002:CCG,
author = "L. DeRose and F. Wolf",
title = "{CATCH} --- a Call-Graph Based Automatic Tool for
Capture of Hardware Performance Metrics for {MPI} and
{OpenMP} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2400",
pages = "167--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:10:14 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000167.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2400/24000167.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Ding:2002:MOP,
author = "Yun He and Chris H. Q. Ding",
key = "multidimensional arrays; index reshuffle; vacancy
tracking cycles; global exchange; dynamical remapping;
MPI; OpenMP; hybrid MPI/OpenMP; SMP cluster.",
title = "{MPI} and {OpenMP} Paradigms on Cluster of {SMP}
Architectures",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2002.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap325.pdf",
abstract = "We investigate remapping multi-dimensional arrays on
cluster of SMP architectures under OpenMP, MPI, and
hybrid paradigms. Traditional method of array transpose
needs an auxiliary array of the same size and a copy
back stage. We recently developed an in-place method
using vacancy tracking cycles. The vacancy tracking
algorithm outperforms the traditional 2-array method as
demonstrated by extensive comparisons. The independence
of vacancy tracking cycles allows efficient
parallelization of the in-place method on SMP
architectures at node level. Performance of
multi-threaded parallelism using OpenMP are tested with
different scheduling methods and different number of
threads. The vacancy tracking method is parallelized
using several parallel paradigms. At node level, pure
OpenMP outperforms pure MPI by a factor of 2.76. Across
entire cluster of SMP nodes, the hybrid MPI/OpenMP
implementation outperforms pure MPI by a factor of
4.44, demonstrating the validity of the parallel
paradigm of mixing MPI with OpenMP.",
acknowledgement = ack-nhfb,
}
@Article{DiSerio:2002:ENN,
author = "Angela {Di Serio} and Mar{\'\i}a B. Ib{\'a}{\~n}ez",
title = "Evaluation of a Nearest-Neighbor Load Balancing
Strategy for Parallel Molecular Simulations in {MPI}
Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "226--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740226.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740226.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dow:2002:CMA,
author = "Chyi-Ren Dow and Jong-Shin Chen and Min-Chang Hsieh",
title = "Checkpointing {MPI} applications on symmetric
multi-processor machines using {SMPCkpt}",
journal = j-J-SYST-SOFTW,
volume = "63",
number = "2",
pages = "137--150",
day = "15",
month = aug,
year = "2002",
CODEN = "JSSODM",
ISSN = "0164-1212 (print), 1873-1228 (electronic)",
ISSN-L = "0164-1212",
bibdate = "Sat Oct 25 07:14:09 MDT 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of systems and software",
journal-URL = "http://www.sciencedirect.com/science/journal/01641212",
}
@InProceedings{El-Ghazawi:2002:UPP,
author = "Tarek El-Ghazawi and Fran{\c{c}}ois Cantonnet",
title = "{UPC} Performance and Potential: a {NPB} Experimental
Study",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap316.pdf",
abstract = "UPC, or Unified Parallel C, is a parallel extension of
ANSI C. UPC follows a distributed shared memory
programming model aimed at leveraging the ease of
programming of the shared memory paradigm, while
enabling the exploitation of data locality. UPC
incorporates constructs that allow placing data near
the threads that manipulate them to minimize remote
accesses. This paper gives an overview of the concepts
and features of UPC and establishes, through extensive
performance measurements of NPB workloads, the
viability of the UPC programming language compared to
the other popular paradigms. Further, through
performance measurements we identify the challenges,
the remaining steps and the priorities for UPC. It will
be shown that with proper hand tuning libraries, UPC
performance will be comparable incorporating such
improvements into automatic compare quite favorably to
message passing in ease and optimized collective
operations to that of MPI. Furthermore, by compiler
optimizations, UPC will of programming.",
acknowledgement = ack-nhfb,
keywords = "NPB (NAS Parallel Benchmark)",
}
@Article{Espenica:2002:PPA,
author = "Roberto Espenica and Pedro Medeiros",
title = "Porting {PVM} to the {VIA} Architecture Using a Fast
Communication Library",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "341--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740341.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740341.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@TechReport{Fagg:2002:FTM,
author = "Graham E. Fagg and Antonin Bukovsky and Sathish
Vadhiyar and Jack J. Dongarra",
title = "Fault Tolerant {MPI} for the {HARNESS MetaComputing}
System",
type = "Technical report",
number = "????",
institution = inst-UTK,
address = inst-UTK:adr,
pages = "14",
year = "2002",
bibdate = "Tue Jan 13 18:41:26 2004",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/ft-mpi-iccs-gef.pdf",
acknowledgement = ack-nhfb,
}
@TechReport{Fagg:2002:HFTa,
author = "Graham E. Fagg and Jack J. Dongarra",
title = "{HARNESS} Fault Tolerant {MPI} Design, Usage and
Performance Issues",
type = "Technical report",
number = "????",
institution = inst-UTK,
address = inst-UTK:adr,
year = "2002",
bibdate = "Tue Jan 13 18:42:49 2004",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/ft-mpi-fgcs-grid-se.pdf",
acknowledgement = ack-nhfb,
}
@Article{Fagg:2002:HFTb,
author = "Graham E. Fagg and Jack J. Dongarra",
title = "{HARNESS} fault tolerant {MPI} design, usage and
performance issues",
journal = j-FUT-GEN-COMP-SYS,
volume = "18",
number = "8",
pages = "1127--1142",
month = oct,
year = "2002",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Jan 10 10:03:29 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Field:2002:OSR,
author = "A. J. Field and P. H. J. Kelly and T. L. Hansen",
title = "Optimising Shared Reduction Variables in {MPI}
Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2400",
pages = "630--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:10:14 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000630.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2400/24000630.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Book{Garg:2002:TOA,
author = "Rajat P. Garg and Ilya Sharapov",
title = "Techniques for optimizing applications: high
performance computing",
publisher = pub-SUN-MICROSYSTEMS-PRESS,
address = pub-SUN-MICROSYSTEMS-PRESS:adr,
pages = "xliii + 616",
year = "2002",
ISBN = "0-13-093476-3",
ISBN-13 = "978-0-13-093476-5",
LCCN = "QA76.88 .G37 2002",
bibdate = "Fri Apr 11 08:26:42 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib;
http://www.sun.com/blueprints/",
series = "Sun BluePrints Program",
URL = "http://www.sun.com/books/catalog/garg.html/index.html;
http://www.sun.com/solutions/blueprints/tools/",
acknowledgement = ack-nhfb,
annote = "From the Web site: The \verb=HPC_code_examples.tar.Z=
tar-file contains the source code, makefiles, and shell
scripts required to compile, link, and run the example
programs discussed in the book.",
keywords = "Forte Developer; MPI; OpenMP; Sun ClusterTools; Sun
Solaris",
}
@Article{Gine:2002:ALT,
author = "Francesc Gin{\'e} and Francesc Solsona and Porfidio
Hern{\'a}ndez and Emilio Luque",
title = "Adjusting the Lengths of Time Slices when Scheduling
{PVM} Jobs with High Memory Requirements",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "156--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740156.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740156.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Goedecker:2002:OPF,
author = "Stefan Goedecker",
title = "Optimization and parallelization of a force field for
silicon using {OpenMP}",
journal = j-COMP-PHYS-COMM,
volume = "148",
number = "1",
pages = "124--135",
day = "1",
month = oct,
year = "2002",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/S0010-4655(02)00466-6",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:41:24 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465502004666",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Gonzalez:2002:DLP,
author = "Marc Gonz{\'a}lez and Eduard Ayguad{\'e} and Xavier
Martorell and Jes{\'u}s Labarta and Phu V. Luong",
title = "Dual-Level Parallelism Exploitation with {OpenMP} in
Coastal Ocean Circulation Modeling",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "469--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270469.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270469.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2002:BLC,
author = "William Gropp",
title = "Building Library Components that Can Use Any {MPI}
Implementation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "280--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740280.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740280.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2002:MG,
author = "William Gropp and Ewing Lusk",
title = "{MPI} on the {Grid}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "12--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740012.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740012.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2002:MNS,
author = "William Gropp",
title = "{MPICH2}: a New Start for {MPI} Implementations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "7--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740007.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740007.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hadjidoukas:2002:MOI,
author = "Panagiotis E. Hadjidoukas and Eleftherios D.
Polychronopoulos and Theodore S. Papatheodorou",
title = "A Modular {OpenMP} Implementation for Clusters of
Multiprocessors",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "153--168",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{He:2002:MOP,
author = "Yun He and Chris H. Q. Ding",
title = "{MPI} and {OpenMP} Paradigms on Cluster of {SMP}
Architectures: The Vacancy Tracking Algorithm for
Multi-Dimensional Array Transposition",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "117--128",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{Heikonen:2002:ILB,
author = "Jussi Heikonen and Kalle Eerola",
title = "Improving Load Balance in a Weather Code: Asynchronous
Output in {HIRLAM} with {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2367",
pages = "567--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:54 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670567.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2367/23670567.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Huang:2002:DDD,
author = "Wei Huang and Zhe Wang and Jie Ma",
title = "Design of {DMPI} on {DAWNING-3000}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "314--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740314.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740314.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Huttunen:2002:MCC,
author = "Pentti Huttunen and Jouni Ikonen and Jari Porras",
title = "{MPIT} --- Communication\slash Computation Paradigm
for Networks of {SMP} Workstations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2367",
pages = "160--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Sep 12 08:36:35 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670160.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2367/23670160.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Isabel:2002:CMO,
author = "Dorta Isabel and Le{\'o}n Coromoto and Rodr{\'\i}guez
Casiano",
title = "Comparing {MPI} and {OpenMP} implementations of the
$0$-$1$ Knapsack Problem",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "129--137",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{Islam:2002:IAC,
author = "Mohammad Towhidul Islam and Parimala Thulasiraman and
Ruppa K. Thulasiram",
title = "Implementation of Ant Colony Optimization Algorithm
for Mobile Ad hoc Network Applications: {OpenMP}
Experiences",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "177--191",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{Iwama:2002:PLS,
author = "Kazuo Iwama and Daisuke Kawai and Shuichi Miyazaki and
Yasuo Okabe and Jun Umemoto",
title = "Parallelizing local search for {CNF} satisfiability
using vectorization and {PVM}",
journal = j-ACM-J-EXP-ALGORITHMICS,
volume = "7",
pages = "2--2",
month = "????",
year = "2002",
CODEN = "????",
DOI = "https://doi.org/10.1145/944618.944620",
ISSN = "1084-6654",
bibdate = "Mon Oct 6 16:04:20 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The purpose of this paper is to speed up the local
search algorithm for the CNF Satisfiability problem.
Our basic strategy is to run some 10$^5$ independent
search paths simultaneously using PVM on a vector
supercomputer VPP800, which consists of 40 vector
processors. Using the above parallelization and
vectorization together with some improvement of data
structure, we obtained 600-times speedup in terms of
the number of flips the local search can make per
second, compared to the original GSAT by Selman and
Kautz. We ran our parallel GSAT for benchmark instances
and compared the running time with those of existing
SAT programs. We could observe an apparent benefit of
parallelization: Especially, we were able to solve two
instances that have never been solved before this
paper. We also tested parallel local search for the SAT
encoding of the class scheduling problem. Again we were
able to get almost the best answer in reasonable
time.",
acknowledgement = ack-nhfb,
fjournal = "ACM Journal of Experimental Algorithmics",
keywords = "algorithms; CNF Satisfiability; distributed computing;
experimentation; local search algorithms;
parallelization; PVM; vector supercomputer;
vectorization",
}
@Article{Kabir:2002:DIS,
author = "Yacine Kabir and A. Belhadj-Aissa",
title = "Distributed Image Segmentation System by a
Multi-agents Approach (Under {PVM} Environment)",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "138--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740138.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740138.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Karniadakis:2002:DLP,
author = "Suchuan Dong and George Em. Karniadakis",
title = "Dual-Level Parallelism for Deterministic and
Stochastic {CFD} Problems",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2002.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap137.pdf",
abstract = "A hybrid two-level parallelism using MPI/OpenMP is
implemented in the general-purpose spectral/hp element
CFD code NekTar to take advantage of the hierarchical
structures arising in deterministic and stochastic CFD
problems. We take a coarse grain approach to
shared-memory parallelism with OpenMP and employ a
workload-splitting scheme that can reduce the OpenMP
synchronizations to the minimum. The hybrid
implementation shows good scalability with respect to
both the problem size and the number of processors in
case of a fixed problem size. With the same number of
processors, the hybrid model with 2 (or 4) OpenMP
threads per MPI process is observed to perform better
than pure MPI and pure OpenMP on the NCSA SGI Origin
2000, while the pure MPI model performs the best on the
IBM SP3 at SDSC and on the Compaq Alpha cluster at PSC.
A key new result is that the use of threads facilitates
effectively prefinement, which is crucial to adaptive
discretization using high-order methods.",
acknowledgement = ack-nhfb,
}
@Book{Karniadakis:2002:PSC,
author = "George Em Karniadakis and Robert M. Kirby",
title = "Parallel Scientific Computing in {C++} and {MPI}: a
Seamless Approach to Parallel Algorithms",
publisher = pub-CAMBRIDGE,
address = pub-CAMBRIDGE:adr,
pages = "xi + 616",
year = "2002",
ISBN = "0-521-52080-0 (paperback), 0-521-81754-4 (hardcover)",
ISBN-13 = "978-0-521-52080-5 (paperback), 978-0-521-81754-7
(hardcover)",
LCCN = "QA76.58 .K37 2003",
bibdate = "Wed Aug 27 06:43:56 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
price = "US\$50.00 (paperback), US\$130.00 (hardcover)",
URL = "ftp://uiarchive.cso.uiuc.edu/pub/etext/gutenberg/;
http://www.loc.gov/catdir/description/cam031/2002034805.html;
http://www.loc.gov/catdir/samples/cam033/2002034805.html;
http://www.loc.gov/catdir/toc/cam031/2002034805.html",
acknowledgement = ack-nhfb,
subject = "Parallel processing (Electronic computers); C++
(Computer program language); Data transmission
systems",
}
@Article{Kasprzyk:2002:APV,
author = "Leszek Kasprzyk and Ryszard Nawrowski and Andrzej
Tomczewski",
title = "Application of a Parallel Virtual Machine for the
Analysis of a Luminous Field",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "122--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740122.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740122.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Keppens:2002:OPM,
author = "R. Keppens and G. T{\'o}th",
title = "{OpenMP} Parallelism for Multi-dimensional
Grid-Adaptive Magnetohydrodynamic Simulations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2329",
pages = "940--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:34 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2329.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2329/23290940.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2329/23290940.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Krawezik:2002:SOV,
author = "G{\'e}raud Krawezik and Guillaume All{\'e}on and
Franck Cappello",
title = "{SPMD OpenMP} versus {MPI} on a {IBM SMP} for 3
Kernels of the {NAS} Benchmarks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "425--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270425.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270425.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Krysztop:2002:IFP,
author = "Bartosz Krysztop and Henryk Krawczyk",
title = "Improving Flexibility and Performance of {PVM}
Applications by Distributed Partial Evaluation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "376--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740376.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740376.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Langlais:2002:SSM,
author = "M. Langlais and G. Latu and J. Roman and P. Silan",
title = "Stochastic Simulation of a Marine Host-Parasite System
Using a Hybrid {MPI\slash OpenMP} Programming",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2400",
pages = "436--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:10:14 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2400.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2400/24000436.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2400/24000436.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Lazzarino:2002:PBP,
author = "Oscar Lazzarino and Andrea Sanna and Claudio Zunino
and Fabrizio Lamberti",
title = "A {PVM}-Based Parallel Implementation of the {REYES}
Image Rendering Architecture",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "165--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740165.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740165.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Lee:2002:IPC,
author = "Nung Kion Lee and David Taniar and J. Wenny Rahayu and
Mafruz Zaman Ashrafi",
title = "Implementation of Parallel Collection Equi-Join Using
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2367",
pages = "217--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:54 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670217.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2367/23670217.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Lopez:2002:ESM,
author = "F{\'e}lix C{\'e}sar Garc{\'\i}a L{\'o}pez and Nieves
Luz Fr{\'\i}as Arrocha",
title = "Expanding the Synchronization Model for {OpenMP}",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "169--175",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{Luecke:2002:DDM,
author = "Glenn R. Luecke and Yan Zou and James Coyle and Jim
Hoekstra and Marina Kraeva",
title = "Deadlock detection in {MPI} programs",
journal = j-CCPE,
volume = "14",
number = "11",
pages = "911--932",
day = "25",
month = aug,
year = "2002",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.701",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Nov 9 12:24:19 MST 2002",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/97519209/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=97519209{\&}PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Macias:2002:SEA,
author = "Elsa M. Mac{\'\i}as and Alvaro Su{\'a}rez",
title = "Solving Engineering Applications with {LAMGAC} over
{MPI-2}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "130--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740130.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740130.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mahinthakumar:2002:HMO,
author = "G. Mahinthakumar and F. Saied",
title = "A Hybrid {MPI-OpenMP} Implementation of an Implicit
Finite-Element Code on Parallel Architectures",
journal = j-IJHPCA,
volume = "16",
number = "4",
pages = "371--393",
month = "Winter",
year = "2002",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Nov 28 06:52:13 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Marcos:2002:DDP,
author = "Carlos de la Fuente Marcos and Pierre Barge and
Ra{\'u}l de la Fuente Marcos",
title = "Dust Dynamics in Protoplanetary Disks: Parallel
Computing with {PVM}",
journal = j-J-COMPUT-PHYS,
volume = "176",
number = "2",
pages = "276--294",
day = "1",
month = mar,
year = "2002",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1006/jcph.2001.6978",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Mon Jan 2 22:12:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999101969785",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Marowka:2002:ISI,
author = "Ami Marowka",
title = "Introduction to the special issue: {OpenMP}:
Experiences, Implementations and Applications",
journal = j-PARALLEL-DIST-COMP-PRACT,
volume = "5",
number = "2",
pages = "v--v",
month = jun,
year = "2002",
CODEN = "????",
ISSN = "1097-2803",
bibdate = "Thu Sep 2 12:08:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/5/2.toc",
acknowledgement = ack-nhfb,
fjournal = "PDCP: Parallel and Distributed Computing Practices",
}
@Article{Michailidis:2002:PSL,
author = "Panagiotis D. Michailidis and Konstantinos G.
Margaritis",
title = "A Performance Study of Load Balancing Strategies for
Approximate String Matching on an {MPI} Heterogeneous
System Environment",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "432--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740432.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740432.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mohr:2002:DPP,
author = "Bernd Mohr and Allen D. Malony and Sameer Shende and
Felix Wolf",
title = "Design and Prototype of a Performance Tool Interface
for {OpenMP}",
journal = j-J-SUPERCOMPUTING,
volume = "23",
number = "1",
pages = "105--128",
month = aug,
year = "2002",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jan 14 07:25:20 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/journalhome.htm/0920-8542",
URL = "http://ipsapp008.kluweronline.com/content/getfile/5189/37/8/abstract.htm;
http://ipsapp008.kluweronline.com/content/getfile/5189/37/8/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Muller:2002:SMB,
author = "Matthias S. M{\"u}ller",
title = "A Shared Memory Benchmark in {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "380--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270380.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270380.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nakajima:2002:PISa,
author = "Kengo Nakajima and Hiroshi Okuda",
title = "Parallel Iterative Solvers for Unstructured Grids
Using an {OpenMP\slash MPI} Hybrid Programming Model
for the {GeoFEM} Platform on {SMP} Cluster
Architectures",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "437--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270437.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270437.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Nakajima:2002:PISb,
author = "Kengo Nakajima and Hiroshi Okuda",
title = "Parallel iterative solvers for unstructured grids
using a directive\slash {MPI} hybrid programming model
for the {GeoFEM} platform on {SMP} cluster
architectures",
journal = j-CCPE,
volume = "14",
number = "6--7",
pages = "411--429",
month = may # "\slash " # jun,
year = "2002",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.622",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Nov 9 12:24:19 MST 2002",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
URL = "http://www3.interscience.wiley.com/cgi-bin/abstract/94515747/START;
http://www3.interscience.wiley.com/cgi-bin/fulltext?ID=94515747{\&}PLACEBO=IE.pdf",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Nakano:2002:SCG,
author = "Hirofumi Nakano and Kazuhisa Ishizaka and Motoki Obata
and Keiji Kimura and Hironori Kasahara",
title = "Static Coarse Grain Task Scheduling with Cache
Optimization Using {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "479--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270479.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270479.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Norden:2002:OVM,
author = "M. Nord{\'e}n and S. Holmgren and M. Thun{\'e}",
title = "{OpenMP} versus {MPI} for {PDE} Solvers Based on
Regular Sparse Numerical Operators",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2331",
pages = "681--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:36 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2331.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2331/23310681.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2331/23310681.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Ong:2002:MRS,
author = "Emil Ong",
title = "{MPI Ruby}: Scripting in a Parallel Environment",
journal = j-COMPUT-SCI-ENG,
volume = "4",
number = "4",
pages = "78--82",
month = jul # "\slash " # aug,
year = "2002",
CODEN = "CSENFA",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Sat Jan 3 18:25:00 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://csdl.computer.org/comp/mags/cs/2002/04/c4078abs.htm;
http://csdl.computer.org/dl/mags/cs/2002/04/c4078.htm;
http://csdl.computer.org/dl/mags/cs/2002/04/c4078.pdf",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@InProceedings{Phillips:2002:NBS,
author = "James C. Phillips and Gengbin Zheng and Sameer Kumar
and Laxmikant V. Kal{\'e}",
title = "{NAMD}: Biomolecular Simulation on Thousands of
Processors",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap277.pdf",
abstract = "NAMD is a fully featured, production molecular
dynamics program for high performance simulation of
large biomolecular systems. We have previously, at
SC2000, presented scaling results for simulations with
cutoff electrostatics on up to 2048 processors of the
ASCI Red machine, achieved with an object-based hybrid
force and spatial decomposition scheme and an
aggressive measurement-based predictive load balancing
framework. We extend this work by demonstrating similar
scaling on the much faster processors of the PSC
Lemieux Alpha cluster, and for simulations employing
efficient (order N log N) particle mesh Ewald full
electrostatics. This unprecedented scalability in a
biomolecular simulation code has been attained through
latency tolerance, adaptation to multiprocessor nodes,
and the direct use of the Quadrics Elan library in
place of MPI by the Charm++/Converse parallel runtime
system.",
acknowledgement = ack-nhfb,
}
@Article{Piriyakumar:2002:EFI,
author = "Douglas Antony Louis Piriyakumar and Paul Levi and
Rolf Rabenseifner",
title = "Enhanced File Interoperability with Parallel {MPI}
File-{I/O} in Image Processing",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "174--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740174.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740174.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Plachetka:2002:QTS,
author = "Tomas Plachetka",
title = "(Quasi-) Thread-Safe {PVM} and (Quasi-) Thread-Safe
{MPI} without Active Polling",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "296--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740296.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740296.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Prabhakar:2002:PCB,
author = "Achal Prabhakar and Vladimir Getov and Barbara
Chapman",
title = "Performance Comparisons of Basic {OpenMP} Constructs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "413--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270413.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270413.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Rauber:2002:LSH,
author = "Thomas Rauber and Gudula R{\"u}nger",
title = "Library Support for Hierarchical Multi-Processor
Tasks",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap176.pdf",
abstract = "The paper considers the modular programming with
hierarchically structured multi-processor tasks on top
of SPMD tasks for distributed memory machines. The
parallel execution requires a corresponding
decomposition of the set of processors into a
hierarchical group structure onto which the tasks are
mapped. This results in a multi-level group SPMD
computation model with varying processor group
structures. The advantage of this kind of mixed task
and data parallelism is a potential to reduce the
communication overhead and to increase scalability. We
present a runtime library to support the coordination
of hierarchically structured multi-processor tasks. The
library exploits an extended parallel group SPMD
programming model and manages the entire task execution
including the dynamic hierarchy of processor groups.
The library is built on top of MPI, has an easy-to-use
interface, and leads to only a marginal overhead while
allowing static planning and dynamic restructuring.
Keywords: mixed task and data parallelism,
multiprocessor tasks, multilevel group SPMD,
hierarchical decomposition of processor sets, library
support, distributed memory",
acknowledgement = ack-nhfb,
}
@Article{Reussner:2002:SCB,
author = "Ralf Reussner and Peter Sanders and Jesper Larsson
Tr{\"a}ff",
title = "{SKaMPI}: a comprehensive benchmark for public
benchmarking of {MPI}",
journal = j-SCI-PROG,
volume = "10",
number = "1",
pages = "55--65",
year = "2002",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Sat Oct 26 14:52:27 MDT 2002",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://iospress.metapress.com/app/home/contribution.asp%3Fwasp=9ejnuvwuvby9737jte27%26referrer=parent%26backto=issue%2C6%2C9%3Bjournal%2C2%2C12%3Blinkingpublicationresults%2C1%2C1",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Sack:2002:FMB,
author = "Paul Sack and Anne C. Elster",
title = "Fast {MPI} Broadcasts through Reliable Multicasting",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2367",
pages = "445--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:54 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2367.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2367/23670445.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2367/23670445.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Selikhov:2002:MCC,
author = "Anton Selikhov and George Bosilca and Cecile Germain
and Gilles Fedak and Franck Cappello",
title = "{MPICH-CM}: a Communication Library Design for a {P2P
MPI} Implementation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "323--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740323.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740323.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Shires:2002:EHM,
author = "D. Shires and R. Mohan",
title = "An Evaluation of {HPF} and {MPI} Approaches and
Performance in Unstructured Finite Element
Simulations",
journal = "Journal of Mathematical Modelling and Algorithms",
volume = "1",
number = "3",
publisher = "Kluwer Academic Publishers, Dordrecht, The
Netherlands",
pages = "153--167",
year = "2002",
CODEN = "????",
ISSN = "1570-1166",
bibdate = "Sat Dec 7 09:42:43 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta
database",
acknowledgement = ack-nhfb,
pagecount = "15",
}
@InProceedings{Sistare:2002:UHP,
author = "Steven J. Sistare and Christopher J. Jackson",
title = "Ultra-High Performance Communication with {MPI} and
the {Sun Fire(\TM)} Link Interconnect",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap142.pdf",
abstract = "We present a new low-latency system area network that
provides the ultra-high bandwidth needed to fuse a
collection of large SMP servers into a capability
cluster. The network adapter exports a remote shared
memory (RSM) model that supports low latency kernel
bypass messaging. The Sun\TM{} MPI library uses the RSM
interface to implement a highly efficient
memory-to-memory messaging protocol in which the
library directly manages buffers and data structures in
remote memory. This allows flexible allocation of
buffer space to active connections, while avoiding
resource contention that could otherwise increase
latencies. We discuss the characteristics of the
interconnect, describe the MPI protocols, and measure
the performance of a number of MPI benchmarks. Our
results include MPI inter-node bandwidths of almost 3
Gigabytes per second and MPI ping-pong latencies as low
as 3.7 microseconds.",
acknowledgement = ack-nhfb,
keywords = "interconnects; kernel bypass; MPI; performance
evaluation; remote shared memory; SAN",
}
@Article{Smyk:2002:AMM,
author = "Adam Smyk and Marek Tudruj",
title = "Application of Mixed {{\em MPI OpenMP\/}} Programming
in a Multi {SMP} Cluster Computer",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2328",
pages = "288--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Thu Sep 12 08:34:49 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2328.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2328/23280288.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2328/23280288.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Smyk:2002:OMP,
author = "Adam Smyk and Marek Tudruj",
title = "{\em {OpenMP\/}} / {\em {MPI\/}} Programming in a
Multi-cluster System Based on Shared Memory\slash
Message Passing Communication",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2326",
pages = "241--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2326.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2326/23260241.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2326/23260241.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Stpiczynski:2002:PPO,
author = "Przemyslaw Stpiczynski",
title = "{Parallel Programming in OpenMP} Helps Novices: a
review of {Parallel Programming in OpenMP} by {Rohit
Chandra}, {Leonardo Dagum}, {Dave Kohr}, {Dror Maydan},
{Jeff McDonald}, and {Ramesh Menon}",
journal = j-IEEE-DISTRIB-SYST-ONLINE,
volume = "3",
number = "8",
year = "2002",
ISSN = "1541-4922 (print), 1558-1683 (electronic)",
ISSN-L = "1541-4922",
bibdate = "Wed Oct 23 17:47:56 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dsonline.computer.org/0208/d/bks_a.htm",
acknowledgement = ack-nhfb,
fjournal = "IEEE Distributed Systems Online",
}
@Article{Takahashi:2002:PEH,
author = "Daisuke Takahashi and Mitsuhisa Sato and Taisuke
Boku",
title = "Performance Evaluation of the {Hitachi SR8000} Using
{OpenMP} Benchmarks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "390--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/lncs2002a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270390.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270390.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Thakur:2002:ONA,
author = "Rajeev Thakur and William Gropp and Ewing Lusk",
title = "Optimizing noncontiguous accesses in {MPI-IO}",
journal = j-PARALLEL-COMPUTING,
volume = "28",
number = "1",
pages = "83--105",
month = jan,
year = "2002",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 22 16:52:43 MST 2002",
bibsource = "http://www.elsevier.com/locate/issn/01678191;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.elsevier.com/gej-ng/10/35/21/60/27/32/abstract.html;
http://www.elsevier.nl/gej-ng/10/35/21/60/27/32/00001686.pdf",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Tian:2002:IOC,
author = "Xinmin Tian and Aart Bik and Milind Girkar and Paul
Grey and Hideki Saito and Ernesto Su",
title = "{Intel\reg{}} {OpenMP C++\slash Fortran} Compiler for
Hyper-Threading Technology: Implementation and
Performance",
journal = j-INTEL-TECH-J,
volume = "6",
number = "1",
pages = "36--46",
month = feb,
year = "2002",
ISSN = "1535-766X",
bibdate = "Thu Feb 28 15:24:21 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/intel-tech-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://developer.intel.com/technology/itj/2002/volume06issue01/vol6iss1_hyper_threading_technology.pdf",
}
@Article{Traff:2002:IMA,
author = "Jesper Larsson Tr{\"a}ff",
title = "Improved {MPI} All-to-all Communication on a {Giganet
SMP} Cluster",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "392--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740392.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740392.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Traff:2002:IMP,
author = "Jesper Larsson Traff",
title = "Implementing the {MPI} Process Topology Mechanism",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap122.pdf",
abstract = "The topology functionality of the Message Passing
Interface (MPI) provides a portable,
architecture-independent means for adapting application
programs to the communication architecture of the
target hardware. However, current MPI implementations
rarely go beyond the most trivial implementation, and
simply performs no process remapping. We discuss the
potential of the topology mechanism for systems with a
hierarchical communication architecture like clusters
of SMP nodes. The MPI topology functionality is a weak
mechanism, and we argue about some of its shortcomings.
We formulate the topology optimization problem as a
graph embedding problem, and show that for hierarchical
systems it can be solved by graph partitioning. We
state the properties of a new heuristic for solving
both the embedding problem and the ``easier'' graph
partitioning problem. The graph partitioning based
framework has been fully implemented in MPI/SX for the
NEC SX-series of parallel vector computers. MPI/SX is
thus one of very few MPI implementations with a
non-trivial topology functionality. On a 4 node NEC
SX-6 significant communication performance improvements
are achieved with synthetic MPI benchmarks.",
acknowledgement = ack-nhfb,
}
@Article{Truong:2002:PAM,
author = "Hong-Linh Truong and Thomas Fahringer and Michael
Geissler and Georg Madsen",
title = "Performance Analysis for {MPI} Applications with
{SCALEA}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "421--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740421.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740421.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Uehara:2002:MBP,
author = "Hitoshi Uehara and Masanori Tamura and Mitsuo
Yokokawa",
title = "An {MPI} Benchmark Program Library and Its Application
to the {Earth} Simulator",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2327",
pages = "219--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Tue Sep 10 19:09:32 MDT 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2327.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer-ny.com/link/service/series/0558/bibs/2327/23270219.htm;
http://link.springer-ny.com/link/service/series/0558/papers/2327/23270219.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Vadhiyar:2002:PMS,
author = "Sathish S. Vadhiyar and Graham E. Fagg and Jack J.
Dongarra",
title = "Performance Modeling for Self Adapting Collective
Communications for {MPI}",
crossref = "Oldehoeft:2002:SIS",
pages = "??--??",
year = "2002",
bibdate = "Tue Feb 26 06:44:44 2002",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/utk/people/JackDongarra/PAPERS/coll-lacsi-2001.pdf",
acknowledgement = ack-nhfb,
keywords = "Los Alamos Computer Science Institute (LASCI)",
xxbooktitle = "LACSI Symposium 2001, October 15--18, Eldorado Hotel,
Santa Fe, NM",
}
@Article{Vetter:2002:DSP,
author = "Jeffrey Vetter",
title = "Dynamic statistical profiling of communication
activity in distributed applications",
journal = j-SIGMETRICS,
volume = "30",
number = "1",
pages = "240--250",
month = jun,
year = "2002",
CODEN = "????",
DOI = "https://doi.org/10.1145/511334.511364",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Thu Jun 26 11:38:22 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Performance analysis of communication activity for a
terascale application with traditional message tracing
can be overwhelming in terms of overhead, perturbation,
and storage. We propose a novel alternative that
enables dynamic statistical profiling of an
application's communication activity using message
sampling. We have implemented an operational prototype,
named PHOTON, and our evidence shows that this new
approach can provide an accurate, low-overhead,
tractable alternative for performance analysis of
communication activity. PHOTON consists of two
components: a Message Passing Interface (MPI) profiling
layer that implements sampling and analysis, and a
modified MPI runtime that appends a small but necessary
amount of information to individual messages. More
importantly, this alternative enables an assortment of
runtime analysis techniques so that, in contrast to
post-mortem, trace-based techniques, the raw
performance data can be jettisoned immediately after
analysis. Our investigation shows that message sampling
can reduce overhead to imperceptible levels for many
applications. Experiments on several applications
demonstrate the viability of this approach. For
example, with one application, our technique reduced
the analysis overhead from 154\% for traditional
tracing to 6\% for statistical profiling. We also
evaluate different sampling techniques in this
framework. The coverage of the sample space provided by
purely random sampling is superior to counter- and
timer-based sampling. Also, PHOTON's design reveals
that frugal modifications to the MPI runtime system
could facilitate such techniques on production
computing systems, and it suggests that this sampling
technique could execute continuously for long-running
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
}
@InProceedings{Vetter:2002:EPE,
author = "Jeffrey S. Vetter and Andy Yoo",
title = "An Empirical Performance Evaluation of Scalable
Scientific Applications",
crossref = "IEEE:2002:STI",
pages = "??--??",
year = "2002",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-2002.org/paperpdfs/pap.pap222.pdf",
abstract = "We investigate the scalability, architectural
requirements, and performance characteristics of eight
scalable scientific applications. Our analysis is
driven by empirical measurements using statistical and
tracing instrumentation for both communication and
computation. Based on these measurements, we refine our
analysis into precise explanations of the factors that
influence performance and scalability for each
application; we distill these factors into common
traits and overall recommendations for both users and
designers of scalable platforms. Our experiments
demonstrate that some traits, such as improvements in
the scaling and performance of MPI's collective
operations, will benefit most applications. We also
find specific characteristics of some applications that
limit performance. For example, one application's
intensive use of a 64-bit, floating-point divide
instruction, which has high latency and is not
pipelined on the POWER3, limits the performance of the
application's primary computation.",
acknowledgement = ack-nhfb,
}
@Article{Wallcraft:2002:CCA,
author = "Alan J. Wallcraft",
title = "A Comparison of {Co-Array Fortran} and {OpenMP
Fortran} for {SPMD} Programming",
journal = j-J-SUPERCOMPUTING,
volume = "22",
number = "3",
pages = "231--250",
month = jul,
year = "2002",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jan 14 07:25:19 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/journalhome.htm/0920-8542",
URL = "http://ipsapp008.kluweronline.com/content/getfile/5189/36/1/abstract.htm;
http://ipsapp008.kluweronline.com/content/getfile/5189/36/1/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Wang:2002:OPG,
author = "Ping Wang",
title = "{OpenMP} programming for a global inverse model",
journal = j-SCI-PROG,
volume = "10",
number = "3",
pages = "253--261",
year = "2002",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Sat Oct 26 15:08:19 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Worsch:2002:BCM,
author = "Thomas Worsch and Ralf Reussner and Werner Augustin",
title = "On Benchmarking Collective {MPI} Operations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "2474",
pages = "271--??",
year = "2002",
CODEN = "LNCSD9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Sat Nov 30 20:57:35 MST 2002",
bibsource = "http://link.springer-ny.com/link/service/series/0558/tocs/t2474.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.de/link/service/series/0558/bibs/2474/24740271.htm;
http://link.springer.de/link/service/series/0558/papers/2474/24740271.pdf",
acknowledgement = ack-nhfb,
fjournal = "Lecture Notes in Computer Science",
}
@Article{Addison:2003:OIA,
author = "C. Addison and Y. Ren and M. van Waveren",
title = "{OpenMP} issues arising in the development of parallel
{BLAS} and {LAPACK} libraries",
journal = j-SCI-PROG,
volume = "11",
number = "2",
pages = "95--104",
year = "2003",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Mon Jan 12 06:28:15 MST 2004",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@TechReport{Amestoy:2003:IIMa,
author = "Patrick R. Amestoy and Iain S. Duff and Jean-Yves
L'Excellent and Xiaoye S. Li",
title = "Impact of the implementation of {MPI} point-to-point
communications on the performance of two general sparse
solvers",
type = "Report",
number = "TR/PA/03/14 and RR-4372 and LBNL-48968 and
RT/APO/01/4",
institution = inst-CERFACS,
address = inst-CERFACS:adr,
pages = "????",
year = "2003",
bibdate = "Tue Jan 03 06:25:11 2006",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/duff-iain-s.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Amestoy:2003:IIMb,
author = "Patrick R. Amestoy and Iain S. Duff and Jean-Yves
L'Excellent and Xiaoye S. Li",
title = "Impact of the implementation of {MPI} point-to-point
communications on the performance of two general sparse
solvers",
journal = j-PARALLEL-COMPUTING,
volume = "29",
number = "7",
pages = "833--849",
month = jul,
year = "2003",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Dec 24 09:07:26 MST 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Anonymous:2003:MNIc,
author = "Anonymous",
title = "Micro News: {IBM} ups the ante in silicon transistor
speed; New benchmark suite based on high-performance
computing applications, {MPI} and {OpenMP} [{SPEC
HPC2002}]; {EU} {OKs} {Hitachi}, {Mitsubishi Electric}
semiconductor joint venture; {Intel} launches {Pentium
4} at {3.06 GHz}; {TSMC} unveils viable 25nm
transistors",
journal = j-IEEE-MICRO,
volume = "23",
number = "1",
pages = "6--6, 87",
month = jan # "\slash " # feb,
year = "2003",
CODEN = "IEMIDZ",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Wed Apr 23 18:57:10 MDT 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dlib.computer.org/mi/books/mi2003/pdf/m1006.pdf",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Barekas:2003:MAO,
author = "Vasileios K. Barekas and Panagiotis E. Hadjidoukas and
Eleftherios D. Polychronopoulos and others",
title = "A Multiprogramming Aware {OpenMP} Implementation",
journal = j-SCI-PROG,
volume = "11",
number = "2",
pages = "133--141",
year = "2003",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Mon Jan 12 06:28:15 MST 2004",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@InProceedings{Bouteiller:2003:MVF,
author = "Aurelien Bouteiller and Franck Cappello and Thomas
Herault and Geraud Krawezik and Pierre Lemarinier and
Frederic Magniette",
title = "{MPICH-V2}: a Fault Tolerant {MPI} for Volatile Nodes
based on Pessimistic Sender Based Message Logging",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10696#1;
http://www.sc-conference.org/sc2003/paperpdfs/pap209.pdf",
abstract = "Execution of MPI applications on clusters and Grid
deployments suffering from node and network failures
motivates the use of fault tolerant MPI
implementations. We present MPICH-V2 (the second
protocol of MPICHV project), an automatic fault
tolerant MPI implementation using an innovative
protocol that removes the most limiting factor of the
pessimistic message logging approach: reliable logging
of in transit messages. MPICH-V2 relies on
uncoordinated checkpointing, sender based message
logging and remote reliable logging of message logical
clocks. This paper presents the architecture of
MPICH-V2, its theoretical foundation and the
performance of the implementation. We compare MPICH-V2
to MPICH-V1 and MPICH-P4 evaluating (a) its
point-to-point performance, (b) the performance for the
NAS benchmarks, (c) the application performance when
many faults occur during the execution. Experimental
results demonstrate that MPICH-V2 provides performance
close to MPICH-P4 for applications using large messages
while reducing dramatically the number of reliable
nodes compared to MPICH-V1.",
acknowledgement = ack-nhfb,
}
@Article{Brightwell:2003:DIP,
author = "Ron Brightwell and Rolf Riesen and Arthur B. Maccabe",
title = "Design, Implementation, and Performance of {MPI} on
{Portals 3.0}",
journal = j-IJHPCA,
volume = "17",
number = "1",
pages = "7--20",
month = "Spring",
year = "2003",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Nov 28 06:52:13 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Briguglio:2003:PPM,
author = "Sergio Briguglio and Beniamino {Di Martino} and
Gregorio Vlad",
title = "A performance-prediction model for {PIC} applications
on clusters of Symmetric MultiProcessors: Validation
with hierarchical {HPF $+$ OpenMP} implementation",
journal = j-SCI-PROG,
volume = "11",
number = "2",
pages = "159--176",
year = "2003",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Mon Jan 12 06:28:15 MST 2004",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/hpfortran.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Bronevetsky:2003:AAL,
author = "Greg Bronevetsky and Daniel Marques and Keshav Pingali
and Paul Stodghill",
title = "Automated application-level checkpointing of {MPI}
programs",
journal = j-SIGPLAN,
pages = "84--94",
year = "2003",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 22 16:52:42 MST 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Carson:2003:CGU,
author = "Brett Carson and Robert Murison and Ian A. Mason",
title = "Computational Gains Using {RPVM} on a {Beowulf}
Cluster",
journal = j-R-NEWS,
volume = "3",
number = "1",
pages = "21--26",
month = jun,
year = "2003",
CODEN = "????",
ISSN = "1609-3631",
ISSN-L = "1609-3631",
bibdate = "Thu Aug 13 09:25:10 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/rjournal.bib",
URL = "http://CRAN.R-project.org/doc/Rnews/",
acknowledgement = ack-r-project,
fjournal = "R News: the Newsletter of the R Project",
journal-URL = "http://journal.r-project.org/",
pdf = Rnews2003-1,
}
@Article{Chen:2003:GMD,
author = "L. Chen and C. LiWang and F. C. M. Lau",
title = "A Grid Middleware for Distributed {Java} Computing
with {MPI} Binding and Process Migration Supports",
journal = j-J-COMP-SCI-TECH,
volume = "18",
number = "4",
pages = "505--514",
year = "2003",
CODEN = "JCTEEM",
ISSN = "1000-9000",
ISSN-L = "1000-9000",
bibdate = "Wed Aug 27 05:49:07 MDT 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta
database",
acknowledgement = ack-nhfb,
fjournal = "Journal of computer science and technology",
}
@InProceedings{Coll:2003:SHB,
author = "Salvador Coll and Jose Duato and Fabrizio Petrini and
Francisco J. Mora",
title = "Scalable Hardware-Based Multicast Trees",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10702#2;
http://www.sc-conference.org/sc2003/paperpdfs/pap300.pdf",
abstract = "This paper presents an algorithm for implementing
optimal hardware-based multicast trees, on networks
that provide hardware support for collective
communication. Although the proposed methodology can be
generalized to a wide class of networks, we apply our
methodology to the Quadrics network, a state-of-the-art
network that provides hardware-based multicast
communication. The proposed mechanism is intended to
improve the performance of the collective communication
patterns on the network, in those cases where the
hardware support can not be directly used, for
instance, due to some faulty nodes. This scheme
provides significant reduction on multicast latencies
compared to the original system primitives, which use
multicast trees based on unicast communication. A
backtracking algorithm to find the optimal solution to
the problem is presented. In addition, a greedy
algorithm is presented and shown to provide near
optimal solutions. Finally, our experimental results
show the good performance and scalability of the
proposed multicast tree in comparison to the
traditional unicast-based multicast trees. Our
multicast mechanism doubles barrier synchronization and
broadcasts performance when compared to the
production-level MPI library.",
acknowledgement = ack-nhfb,
}
@Article{Cooperman:2003:UTC,
author = "Gene Cooperman and Henri Casanova and Jim Hayes and
Thomas Witzel",
title = "Using {TOP-C} and {AMPIC} to port large parallel
applications to the {Computational Grid}",
journal = j-FUT-GEN-COMP-SYS,
volume = "19",
number = "4",
pages = "587--596",
month = may,
year = "2003",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Jan 10 10:03:33 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
remark = "Selected papers from the IEEE/ACM International
Symposium on Cluster Computing and the Grid,
Berlin-Brandenburg Academy of Sciences and Humanities,
Berlin, Germany, 21--24 May 2002.",
}
@Article{Czarnul:2003:PTA,
author = "Pawel Czarnul",
title = "Programming, Tuning and Automatic Parallelization of
Irregular Divide-and-Conquer Applications in
{DAMPVM\slash DAC}",
journal = j-IJHPCA,
volume = "17",
number = "1",
pages = "77--93",
month = "Spring",
year = "2003",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Nov 28 06:52:13 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{DePasquale:2003:UJU,
author = "C. J. DePasquale",
title = "Using the {JVMPI} to Understand the Behavior of {Java}
Classes During the Development Process",
journal = "Cmg",
volume = "2",
number = "??",
publisher = "Computer Measurement Group",
pages = "821--832",
year = "2003",
CODEN = "????",
bibdate = "Sat Apr 3 08:12:24 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib; Ingenta
database",
acknowledgement = ack-nhfb,
}
@InProceedings{Fernandez:2003:BMN,
author = "Juan Fernandez and Eitan Frachtenberg and Fabrizio
Petrini",
title = "{BCS-MPI}: a New Approach in the System Software
Design for Large-Scale Parallel Computers",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10716#1;
http://www.sc-conference.org/sc2003/paperpdfs/pap306.pdf",
abstract = "Buffered CoScheduled MPI (BCS-MPI) introduces a new
approach to design the communication layer for
large-scale parallel machines. The emphasis of BCS-MPI
is on the global coordination of a large number of
communicating processes rather than on the traditional
optimization of the point-to-point performance. BCS-MPI
delays the interprocessor communication in order to
schedule globally the communication pattern and it is
designed on top of a minimal set of collective
communication primitives. In this paper we describe a
prototype implementation of BCS-MPI and its
communication protocols. Several experimental results,
executed on a set of scientific applications, show that
BCS-MPI can compete with a production-level MPI
implementation, but is much simpler to implement, debug
and model. Keywords: MPI, buffered coscheduling, STORM,
Quadrics, system software, communication protocols,
cluster computing, large-scale parallel computers.",
acknowledgement = ack-nhfb,
}
@InProceedings{Gabriel:2003:EPM,
author = "Edgar Gabriel and Graham Fagg and Jack Dongarra",
title = "Evaluating the Performance of {MPI-2} Dynamic
Communicators and One-Sided Communication",
crossref = "Dongarra:2003:RAP",
pages = "??--??",
year = "2003",
bibdate = "Tue Jan 13 18:15:48 2004",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/europvm-mpi-2003-mpi2.pdf",
acknowledgement = ack-nhfb,
}
@InProceedings{Gabriel:2003:FTC,
author = "Edgar Gabriel and Graham E. Fagg and Antonin Bukovsky
and Thara Angskun and Jack J. Dongarra",
editor = "????",
booktitle = "{17th Annual ACM International Conference on
Supercomputing (ICS'03) International Workshop on Grid
Computing and e-Science, June 21, 2003, San
Francisco}",
title = "A Fault-Tolerant Communication Library for {Grid}
Environments",
publisher = "????",
address = "????",
pages = "??--??",
year = "2003",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Tue Jan 13 18:14:32 2004",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/netlib/utk/people/JackDongarra/PAPERS/FTMPI-SF-gabriel.pdf",
acknowledgement = ack-nhfb,
xxcrossref = "ACM:2003:CPI",
}
@Article{Gao:2003:LSP,
author = "Shiwu Gao",
title = "Linear-scaling parallelization of the {WIEN} package
with {MPI}",
journal = j-COMP-PHYS-COMM,
volume = "153",
number = "2",
pages = "190--198",
day = "15",
month = jun,
year = "2003",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/S0010-4655(03)00224-8",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:41:30 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465503002248",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Jin:2003:AMP,
author = "Haoqiang Jin and Gabriele Jost and Jerry Yan and
others",
title = "Automatic multilevel parallelization using {OpenMP}",
journal = j-SCI-PROG,
volume = "11",
number = "2",
pages = "177--190",
year = "2003",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Mon Jan 12 06:28:15 MST 2004",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Karonis:2003:MGG,
author = "Nicholas T. Karonis and Brian Toonen and Ian Foster",
title = "{MPICH-G2}: a {Grid}-enabled implementation of the
{Message Passing Interface}",
journal = j-J-PAR-DIST-COMP,
volume = "63",
number = "5",
pages = "551--563",
month = may,
year = "2003",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Tue Dec 16 16:10:41 MST 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Karwande:2003:CMC,
author = "Amit Karwande and Xin Yuan and David K. Lowenthal",
title = "{CC--MPI}: a compiled communication capable {MPI}
prototype for {Ethernet} switched clusters",
journal = j-SIGPLAN,
volume = "38",
number = "10",
pages = "95--106",
month = oct,
year = "2003",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 22 16:52:42 MST 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@InProceedings{Kee:2003:POP,
author = "Yang-Suk Kee and Jin-Soo Kim and Soonhoi Ha",
title = "{ParADE}: An {OpenMP} Programming Environment for
{SMP} Cluster Systems",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/linux.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2003.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10708#0;
http://www.sc-conference.org/sc2003/paperpdfs/pap130.pdf",
abstract = "Demand for programming environments to exploit
clusters of symmetric multiprocessors (SMPs) is
increasing. In this paper, we present a new programming
environment, called ParADE, to enable easy, portable,
and high-performance programming on SMP clusters. It is
an OpenMP programming environment on top of a
multi-threaded software distributed shared memory
(SDSM) system with a variant of home-based lazy release
consistency protocol. To boost performance, the runtime
system provides explicit message-passing primitives to
make it a hybrid-programming environment. Collective
communication primitives are used for the
synchronization and work-sharing directives associated
with small data structures, lessening the
synchronization overhead and avoiding the implicit
barriers of work-sharing directives. The OpenMP
translator bridges the gap between the OpenMP
abstraction and the hybrid programming interfaces of
the runtime system. The experiments with several NAS
benchmarks and applications on a Linux-based cluster
show promising results that ParADE overcomes the
performance problem of the conventional SDSM-based
OpenMP environment.",
acknowledgement = ack-nhfb,
keywords = "hybrid programming; MPI; OpenMP; programming
environment; SMP cluster; software distributed shared
memory",
}
@Article{Keller:2003:TEE,
author = "Rainer Keller and Edgar Gabriel and Bettina Krammer
and Matthias S. M{\"u}ller and Michael M. Resch",
title = "Towards Efficient Execution of {MPI} Applications on
the {Grid}: Porting and Optimization Issues",
journal = j-J-GRID-COMP,
volume = "1",
number = "2",
pages = "133--149",
month = "????",
year = "2003",
CODEN = "????",
ISSN = "1570-7873 (print), 1572-9184 (electronic)",
ISSN-L = "1570-7873",
bibdate = "Sat Dec 4 11:39:31 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/jrnltoc.htm/1570-7873",
URL = "http://ipsapp008.kluweronline.com/IPS/content/ext/x/J/6160/I/4/A/4/abstract.htm",
acknowledgement = ack-nhfb,
fjournal = "Journal of Grid Computing",
journal-URL = "http://link.springer.com/journal/10723",
}
@InProceedings{Komatitsch:2003:BDF,
author = "Dimitri Komatitsch and Seiji Tsuboi and Chen Ji and
Jeroen Tromp",
title = "A 14.6 billion degrees of freedom, 5 teraflops, 2.5
terabyte earthquake simulation on the {Earth
Simulator}",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10711#1;
http://www.sc-conference.org/sc2003/paperpdfs/pap124.pdf",
abstract = "We use 1944 processors of the Earth Simulator to model
seismic wave propagation resulting from large
earthquakes. Simulations are conducted based upon the
spectral-element method, a high-degree finite-element
technique with an exactly diagonal mass matrix. We use
a very large mesh with 5.5 billion grid points (14.6
billion degrees of freedom). We include the full
complexity of the Earth, i.e., a three-dimensional
wave-speed and density structure, a 3-D crustal model,
ellipticity as well as topography and bathymetry. A
total of 2.5 terabytes of memory is needed. Our
implementation is purely based upon MPI, with loop
vectorization on each processor. We obtain an excellent
vectorization ratio of 99.3\%, and we reach a
performance of 5 teraflops (30\% of the peak
performance) on 38\% of the machine. The very high
resolution of the mesh allows us to perform fully
three-dimensional calculations at seismic periods as
low as 5 seconds.",
acknowledgement = ack-nhfb,
}
@Article{Kranzlmuller:2003:RAP,
author = "Dieter Kranzlm{\"u}ller and Peter Kacsuk and Jack
Dongarra and Jens Volkert",
title = "Recent Advances in Parallel Virtual Machine and
Message Passing Interface (Select papers from the
{EuroPVMMPI 2002 Conference})",
journal = j-IJHPCA,
volume = "17",
number = "1",
pages = "3--5",
month = "Spring",
year = "2003",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Nov 28 06:52:13 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@InProceedings{Li:2003:PNH,
author = "Jianwei Li and Wei-keng Liao and Alok Choudhary and
Robert Ross and Rajeev Thakur and William Gropp and Rob
Latham and Andrew Siegel and Brad Gallagher and Michael
Zingale",
title = "{Parallel netCDF}: a High-Performance Scientific {I/O}
Interface",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10722#1;
http://www.sc-conference.org/sc2003/paperpdfs/pap258.pdf",
abstract = "Dataset storage, exchange, and access play a critical
role in scientific applications. For such purposes
netCDF serves as a portable, efficient file format and
programming interface, which is popular in numerous
scientific application domains. However, the original
interface does not provide an efficient mechanism for
parallel data storage and access. In this work, we
present a new parallel interface for writing and
reading netCDF datasets. This interface is derived with
minimal changes from the serial netCDF interface but
defines semantics for parallel access and is tailored
for high performance. The underlying parallel I/O is
achieved through MPI-IO, allowing for substantial
performance gains through the use of collective I/O
optimizations. We compare the implementation strategies
and performance with HDF5. Our tests indicate
programming convenience and significant I/O performance
improvement with this parallel netCDF (PnetCDF)
interface.",
acknowledgement = ack-nhfb,
}
@InProceedings{Liu:2003:PCM,
author = "Jiuxing Liu and Balasubramanian Chandrasekaran and
Jiesheng Wu and Weihang Jiang and Sushmitha Kini and
Weikuan Yu and Darius Buntinas and Pete Wyckoff and D.
K. Panda",
title = "Performance Comparison of {MPI} Implementations over
{InfiniBand}, {Myrinet} and {Quadrics}",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10696#0;
http://www.sc-conference.org/sc2003/paperpdfs/pap310.pdf",
abstract = "In this paper, we present a comprehensive performance
comparison of MPI implementations over InfiniBand,
Myrinet and Quadrics. Our performance evaluation
consists of two major parts. The first part consists of
a set of MPI level micro-benchmarks that characterize
different aspects of MPI implementations. The second
part of the performance evaluation consists of
application level benchmarks. We have used the NAS
Parallel Benchmarks and the sweep3D benchmark. We not
only present the overall performance results, but also
relate application communication characteristics to the
information we acquired from the micro-benchmarks. Our
results show that the three MPI implementations all
have their advantages and disadvantages. For our 8-node
cluster, InfiniBand can offer significant performance
improvements for a number of applications compared with
Myrinet and Quadrics when using the PCI-X bus. Even
with just the PCI bus, InfiniBand can still perform
better if the applications are bandwidth-bound.",
acknowledgement = ack-nhfb,
}
@Article{Luecke:2003:CPM,
author = "Glenn R. Luecke and Marina Kraeva and Lili Ju",
title = "Comparing the performance of {MPICH} with {Cray}'s
{MPI} and with {SGI}'s {MPI}",
journal = j-CCPE,
volume = "15",
number = "9",
pages = "779--802",
day = "10",
month = aug,
year = "2003",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.719",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Jan 13 09:28:12 MST 2004",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "14 Jul 2003",
}
@Article{Luecke:2003:MCT,
author = "Glenn Luecke and Hua Chen and James Coyle and Jim
Hoekstra and Marina Kraeva and Yan Zou",
title = "{MPI-CHECK}: a tool for checking {Fortran 90 MPI}
programs",
journal = j-CCPE,
volume = "15",
number = "2",
pages = "93--100",
month = feb,
year = "2003",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.705",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Jan 13 09:28:06 MST 2004",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "6 Jan 2003",
}
@Article{Marowka:2003:EOT,
author = "Ami Marowka",
title = "Extending {OpenMP} for Task Parallelism",
journal = j-PARALLEL-PROCESS-LETT,
volume = "13",
number = "3",
pages = "341--??",
month = sep,
year = "2003",
CODEN = "PPLTEE",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Sat Nov 6 18:06:31 MST 2004",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Mattson:2003:HGO,
author = "Timothy G. Mattson",
title = "How good is {OpenMP}?",
journal = j-SCI-PROG,
volume = "11",
number = "2",
pages = "81--93",
year = "2003",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Mon Jan 12 06:28:15 MST 2004",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Michailidis:2003:PEL,
author = "Panagiotis D. Michailidis and Konstantinos G.
Margaritis",
title = "Performance evaluation of load balancing strategies
for approximate string matching application on an {MPI}
cluster of heterogeneous workstations",
journal = j-FUT-GEN-COMP-SYS,
volume = "19",
number = "7",
pages = "1075--1104",
month = oct,
year = "2003",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Jan 10 10:03:37 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
remark = "Selected papers on Theoretical and Computational
Aspects of Structural Dynamical Systems in Linear
Algebra and Control.",
}
@Article{Min:2003:OOP,
author = "Seung-Jai Min and Ayon Basumallik and Rudolf
Eigenmann",
title = "Optimizing {OpenMP} Programs on Software Distributed
Shared Memory Systems",
journal = j-INT-J-PARALLEL-PROG,
volume = "31",
number = "3",
pages = "225--249",
month = jun,
year = "2003",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jan 24 14:51:21 MST 2004",
bibsource = "http://www.kluweronline.com/issn/0885-7458;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=5&LK=NM;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/5/abstract.htm;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/5/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@InProceedings{Moody:2003:SNB,
author = "Adam Moody and Juan Fernandez and Fabrizio Petrini and
Dhabaleswar K. Panda",
title = "Scalable {NIC}-based Reduction on Large-Scale
Clusters",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10716#2;
http://www.sc-conference.org/sc2003/paperpdfs/pap316.pdf",
abstract = "Many parallel algorithms require efficient reduction
collectives. In response, researchers have designed
algorithms considering a range of parameters including
data size, system size, and communication
characteristics. Throughout this past work, however,
processing was limited to the host CPU. Today, modern
Network Interface Cards (NICs) sport programmable
processors with substantial memory, and thus introduce
a fresh variable into the equation. In this paper, we
investigate this new option in the context of
large-scale clusters. Through experiments on the
960-node, 1920-processor ASCI Linux Cluster (ALC) at
Lawrence Livermore National Laboratory, we show that
NIC-based reductions outperform host-based algorithms
in terms of reduced latency and increased consistency.
In particular, in the largest configuration tested ---
1812 processors --- our NIC-based algorithm summed
single-element vectors of 32-bit integers and 64-bit
floating-point numbers in 73 $ \mu $ s and 118 $ \mu $
s, respectively. These results represent respective
improvements of 121\% and 39\% over the
production-level MPI library.",
acknowledgement = ack-nhfb,
}
@Article{Muller:2003:OCB,
author = "Matthias S. M{\"u}ller",
title = "An {OpenMP} compiler benchmark",
journal = j-SCI-PROG,
volume = "11",
number = "2",
pages = "125--131",
year = "2003",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Mon Jan 12 06:28:15 MST 2004",
bibsource = "http://www.iospress.nl/site/html/10589244.html;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@InProceedings{Nakajima:2003:PIS,
author = "Kengo Nakajima",
title = "Parallel Iterative Solvers of {GeoFEM} with Selective
Blocking Preconditioning for Nonlinear Contact Problems
on the {Earth Simulator}",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2003.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10703#1;
http://www.sc-conference.org/sc2003/paperpdfs/pap155.pdf",
abstract = "An efficient parallel iterative method with selective
blocking preconditioning has been developed for
symmetric multiprocessor (SMP) cluster architectures
with vector processors such as the Earth Simulator.
This method is based on a three-level hybrid parallel
programming model, which includes message passing for
inter-SMP node communication, loop directives by OpenMP
for intra-SMP node parallelization and vectorization
for each processing element (PE). This method provides
robust and smooth convergence and excellent vector and
parallel performance in 3D geophysical simulations with
contact conditions performed on the Earth Simulator.
The selective blocking preconditioning is much more
efficient than ILU(1) and ILU(2). Performance for the
complicated Southwest Japan model with more than 23 M
DOF on 10 SMP nodes (80 PEs) of the Earth Simulator was
161.7 GFLOPS, corresponding to 25.3\% of the peak
performance for hybrid programming model, and 190.4
GFLOPS (29.8\% of the peak performance) for flat MPI,
respectively.",
acknowledgement = ack-nhfb,
}
@Article{Nakano:2003:SCG,
author = "Hirofumi Nakano and Kazuhisa Ishizaka and Motoki Obata
and Keiji Kimura and Hironori Kasahara",
title = "Static Coarse Grain Task Scheduling with Cache
Optimization Using {OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "31",
number = "3",
pages = "211--223",
month = jun,
year = "2003",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jan 24 14:51:21 MST 2004",
bibsource = "http://www.kluweronline.com/issn/0885-7458;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=4&LK=NM;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/4/abstract.htm;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/4/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Book{Quinn:2003:PPC,
author = "Michael J. (Michael Jay) Quinn",
title = "Parallel programming in {C} with {MPI} and {OpenMP}",
publisher = pub-MCGRAW-HILL,
address = pub-MCGRAW-HILL:adr,
pages = "xiv + 529",
year = "2003",
ISBN = "0-07-123265-6, 0-07-282256-2",
ISBN-13 = "978-0-07-123265-4, 978-0-07-282256-4",
LCCN = "QA76.73.C15 Q55 2003; QA76.73 .C15 Q55 2003",
bibdate = "Thu Jun 2 07:26:02 MDT 2005",
bibsource = "clavis.ucalgary.ca:2200/UNICORN;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The era of practical parallel programming has arrived,
marked by the popularity of the MPI and OpenMP software
standards and the emergence of commodity clusters as
the hardware platform of choice for an increasing
number of organizations. This exciting new book,
``Parallel Programming in C with MPI and OpenMP''
addresses the needs of students and professionals who
want to learn how to design, analyze, implement, and
benchmark parallel programs in C using MPI and/or
OpenMP. It introduces a rock-solid design methodology
with coverage of the most important MPI functions and
OpenMP directives. It also demonstrates, through a wide
range of examples, how to develop parallel programs
that will execute efficiently on today's parallel
platforms.",
acknowledgement = ack-nhfb,
subject = "C (Computer program language); Parallel programming
(Computer science)",
tableofcontents = "Motivation and history \\
Parallel architectures \\
Parallel algorithm design \\
Message-passing programming \\
The sieve of Erathosthenes \\
Floyd's algorithm \\
Performance analysis \\
Matrix--vector multiplication \\
Document classification \\
Monte Carlo methods \\
Matrix multiplication \\
Solving linear systems \\
Finite difference methods \\
Sorting \\
The Fast Fourier Transform \\
Combinatorial search \\
Shared-memory programming \\
Combining MPI and OpenMP",
}
@Article{Reussner:2003:USD,
author = "Ralf H. Reussner",
title = "Using {SKaMPI} for developing high-performance {MPI}
programs with performance portability",
journal = j-FUT-GEN-COMP-SYS,
volume = "19",
number = "5",
pages = "749--759",
month = jul,
year = "2003",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Jan 10 10:03:34 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
remark = "Tools for Program Development and Analysis. Best
papers from two Technical Sessions, at ICCS2001, San
Francisco, CA, USA, and ICCS2002, Amsterdam, The
Netherlands.",
}
@Article{Saito:2003:LSP,
author = "Hideki Saito and Greg Gaertner and Wesley Jones and
Rudolf Eigenmann and Hidetoshi Iwashita and Ron
Lieberman and Matthijs van Waveren and Brian Whitney",
title = "Large System Performance of {SPEC OMP} Benchmark
Suites",
journal = j-INT-J-PARALLEL-PROG,
volume = "31",
number = "3",
pages = "197--209",
month = jun,
year = "2003",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jan 24 14:51:21 MST 2004",
bibsource = "http://www.kluweronline.com/issn/0885-7458;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=3&LK=NM;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/3/abstract.htm;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/3/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Shires:2003:OPF,
author = "Dale Shires and Ram Mohan",
title = "Optimization and Performance of a {Fortran 90}
{MPI}-Based Unstructured Code on Large-Scale Parallel
Systems",
journal = j-J-SUPERCOMPUTING,
volume = "25",
number = "2",
pages = "131--141",
month = jun,
year = "2003",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Dec 16 08:27:09 MST 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/journalhome.htm/0920-8542",
URL = "http://ipsapp009.kluweronline.com/content/getfile/5189/44/4/abstract.htm;
http://ipsapp009.kluweronline.com/content/getfile/5189/44/4/fulltext.pdf",
abstract = "The message-passing interface (MPI) has become the
standard in achieving effective results when using the
message passing paradigm of parallelization. Codes
written using MPI are extremely portable and are
applicable to both clusters and massively parallel
computing platforms. Since MPI uses the single program,
multiple data (SPMD) approach to parallelism, good
performance requires careful tuning of the serial code
as well as careful data and control flow analysis to
limit communication. We discuss optimization strategies
used and their degree of success to increase
performance of an MPI-based unstructured finite element
simulation code written in Fortran 90. We discuss
performance results based on implementations using
several modern massively parallel computing platforms
including the SGI Origin 3800, IBM Nighthawk 2 SMP, and
Cray T3E-1200.",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Squyres:2003:CAL,
author = "Jeffrey M. Squyres",
title = "A component architecture for {LAM\slash MPI} (citation
only)",
journal = j-SIGPLAN,
pages = "??--??",
year = "2003",
CODEN = "SINODQ",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Dec 22 16:52:42 MST 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Takahashi:2003:PEH,
author = "Daisuke Takahashi and Mitsuhisa Sato and Taisuke
Boku",
title = "Performance Evaluation of the {Hitachi SR8000} Using
{SPEC OMP2001} Benchmarks",
journal = j-INT-J-PARALLEL-PROG,
volume = "31",
number = "3",
pages = "185--196",
month = jun,
year = "2003",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jan 24 14:51:21 MST 2004",
bibsource = "http://www.kluweronline.com/issn/0885-7458;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "/ips/frames/Refs/referenceskapmain.asp?J=4773&I=33&A=2&LK=NM;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/2/abstract.htm;
http://ipsapp007.kluweronline.com/content/getfile/4773/33/2/fulltext.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@InProceedings{Weatherly:2003:DMS,
author = "D. Brent Weatherly and David K. Lowenthal and Mario
Nakazawa and Franklin Lowenthal",
title = "{Dyn-MPI}: Supporting {MPI} on Non Dedicated
Clusters",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10708#1;
http://www.sc-conference.org/sc2003/paperpdfs/pap126.pdf",
abstract = "Distributing data is a fundamental problem in
implementing efficient distributed-memory parallel
programs. The problem becomes more difficult in
environments where the participating nodes are not
dedicated to a parallel application. We are
investigating the data distribution problem in non
dedicated environments in the context of explicit
message-passing programs.\par
To address this problem, we have designed and
implemented an extension to MPI called Dynamic MPI
(Dyn-MPI). The key component of Dyn-MPI is its run-time
system, which efficiently and automatically
redistributes data on the fly when there are changes in
the application or the underlying environment. Dyn-MPI
supports efficient memory allocation, precise
measurement of system load and computation time, and
node removal. Performance results show that programs
that use Dyn-MPI execute efficiently in non dedicated
environments, including up to almost a three-fold
improvement compared to programs that do not
redistribute data and a 25\% improvement over standard
adaptive load balancing techniques.",
acknowledgement = ack-nhfb,
}
@InProceedings{Worringen:2003:FPN,
author = "Joachim Worringen and Jesper Larson Traff and Hubert
Ritzdorf",
title = "Fast Parallel Non-Contiguous File Access",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10722#0;
http://www.sc-conference.org/sc2003/paperpdfs/pap319.pdf",
abstract = "Many applications of parallel I/O perform
non-contiguous file accesses: instead of accessing a
single (large) block of data in a file, a number of
(smaller) blocks of data scattered throughout the file
needs to be accessed in each logical I/O operation.
However, only few file system interfaces directly
support this kind of non-contiguous file access. In
contrast, the most commonly used parallel programming
interface, MPI, incorporates a flexible model of
parallel I/O through its MPI-IO interface. With MPI-IO,
arbitrary non-contiguous file accesses are supported in
a uniform fashion by the use of derived MPI datatypes
set up by the user to reflect the desired I/O
pattern.\par
Despite a considerable amount of recent work in this
area, current MPI-IO implementations suffer from low
performance of such non-contiguous accesses when
compared to the performance of the storage system for
contiguous accesses. In this paper we analyze an
important bottleneck in the efficient handling of
non-contiguous access patterns in current
implementations of MPIIO. We present a new technique,
termed listless I/O, that can be incorporated into
MPI-IO implementations like the well-known ROMIO
implementation, and completely eliminates this
bottleneck. We have implemented the technique in
MPI/SX, the MPI implementation for the NEC SX-series of
parallel vector computers. Results with a synthetic
benchmark and an application kernel show that listless
I/O is able to increase the bandwidth for
non-contiguous file access by sometimes more than a
factor of 500 when compared to the traditional
approach.",
acknowledgement = ack-nhfb,
}
@InProceedings{Ying:2003:NPK,
author = "Lexing Ying and George Biros and Denis Zorin and
Harper Langston",
title = "A new parallel kernel-independent fast multipole
method",
crossref = "ACM:2003:SII",
pages = "??--??",
year = "2003",
bibdate = "Wed Nov 26 07:34:20 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc-conference.org/sc2003/inter_cal/inter_cal_detail.php?eventid=10707#2;
http://www.sc-conference.org/sc2003/paperpdfs/pap166.pdf",
abstract = "We present a new adaptive fast multipole algorithm and
its parallel implementation. The algorithm is
kernel-independent in the sense that the evaluation of
pairwise interactions does not rely on any analytic
expansions, but only utilizes kernel evaluations. The
new method provides the enabling technology for many
important problems in computational science and
engineering. Examples include viscous flows, fracture
mechanics and screened Coulombic interactions. Our
MPI-based parallel implementation logically separates
the computation and communication phases to avoid
synchronization in the upward and downward computation
passes, and thus allows us to fully exploit computation
and communication overlapping. We measure isogranular
and fixed-size scalability for a variety of kernels on
the Pittsburgh Supercomputing Center's TCS-1
AlphaServer on up to 3000 processors. We have solved
viscous flow problems with up to 2.1 billion unknowns
and we have achieved 1.6 Tflops/s peak performance and
1.13 Tflops/s sustained performance.",
acknowledgement = ack-nhfb,
keywords = "adaptive algorithms; boundary integral equations; Fast
multipole methods; massively parallel computing; N-body
problems; viscous flows",
}
@Book{Bisseling:2004:PSC,
author = "Rob H. Bisseling",
title = "Parallel scientific computation: a structured approach
using {BSP} and {MPI}",
publisher = pub-OXFORD,
address = pub-OXFORD:adr,
pages = "xviii + 305",
year = "2004",
ISBN = "0-19-852939-2",
ISBN-13 = "978-0-19-852939-2",
LCCN = "QA76.58 .B57 2004",
bibdate = "Tue Mar 13 14:00:12 MDT 2007",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
URL = "http://www.loc.gov/catdir/enhancements/fy0617/2004046141-d.html;
http://www.loc.gov/catdir/enhancements/fy0617/2004046141-t.html",
acknowledgement = ack-nhfb,
subject = "Bulk Synchronous Parallel (BSP) model; Message Passing
Interface (MPI); Parallel processing (Electronic
computers); Scientific applications; Supercomputers;
Parallel computers",
}
@Article{Boeres:2004:ETF,
author = "Cristina Boeres and Vinod E. F. Rebello",
title = "{EasyGrid}: towards a framework for the automatic
{Grid} enabling of legacy {MPI} applications",
journal = j-CCPE,
volume = "16",
number = "5",
pages = "425--432",
day = "25",
month = apr,
year = "2004",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.821",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat May 14 11:30:53 MDT 2005",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "26 Mar 2004",
}
@Article{Corbalan:2004:PMD,
author = "Julita Corbalan and Xavier Martorell and Jesus
Labarta",
title = "Page Migration with Dynamic Space-Sharing Scheduling
Policies: The Case of the {SGI O2000}",
journal = j-INT-J-PARALLEL-PROG,
volume = "32",
number = "4",
pages = "263--288",
month = aug,
year = "2004",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1023/B:IJPP.0000035815.13969.ec",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:05:14 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=32&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=32&issue=4&spage=263",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "CC-NUMA; dynamic processor allocation policy; memory
page migration; multiprogrammed workload; OpenMP",
}
@Article{Cotronis:2004:CMP,
author = "Yiannis Cotronis",
title = "Composition of {Message Passing Interface}
Applications over {MPICH-G2}",
journal = j-IJHPCA,
volume = "18",
number = "3",
pages = "327--339",
month = "Fall",
year = "2004",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342004046047",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/18/3.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/18/3/327.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Fagg:2004:BUF,
author = "Graham E. Fagg and Jack J. Dongarra",
title = "Building and Using a Fault-Tolerant {MPI}
Implementation",
journal = j-IJHPCA,
volume = "18",
number = "3",
pages = "353--361",
month = "Fall",
year = "2004",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342004046052",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/18/3.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/18/3/353.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Book{Fernando:2004:GGP,
editor = "Randima Fernando",
title = "{GPU} gems: programming techniques, tips, and tricks
for real-time graphics",
volume = "1",
publisher = pub-AW,
address = pub-AW:adr,
pages = "xvv + 765",
year = "2004",
ISBN = "0-321-22832-4",
ISBN-13 = "978-0-321-22832-1",
LCCN = "T385 .G6879 2004",
bibdate = "Thu Jul 29 13:36:54 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib;
z3950.loc.gov:7090/Voyager",
price = "US\$45.99",
series = "GPU gems",
acknowledgement = ack-nhfb,
keywords = "CUDA; nVIDIA",
subject = "Computer graphics; Real-time programming",
}
@Article{Gropp:2004:FTM,
author = "William Gropp and Ewing Lusk",
title = "Fault Tolerance in {Message Passing Interface}
Programs",
journal = j-IJHPCA,
volume = "18",
number = "3",
pages = "363--372",
month = "Fall",
year = "2004",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342004046045",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/18/3.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/18/3/363.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Iwasaki:2004:NPS,
author = "Hideya Iwasaki and Zhenjiang Hu",
title = "A New Parallel Skeleton for General Accumulative
Computations",
journal = j-INT-J-PARALLEL-PROG,
volume = "32",
number = "5",
pages = "389--414",
month = oct,
year = "2004",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1023/B:IJPP.0000038069.80050.74",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:05:18 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=32&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=32&issue=5&spage=389",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Bird data parallel skeleton; Meertens formalism; MPI;
program transformation; Skeletal parallel programming",
}
@InProceedings{Ke:2004:RCM,
author = "Jian Ke and Martin Burtscher and Evan Speight",
title = "Runtime Compression of {MPI} Messages to Improve the
Performance and Scalability of Parallel Applications",
crossref = "ACM:2004:SHP",
pages = "59--59",
year = "2004",
bibdate = "Tue Dec 27 07:57:20 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Kepner:2004:M,
author = "Jeremy Kepner and Stan Ahalt",
title = "{MatlabMPI}",
journal = j-J-PAR-DIST-COMP,
volume = "64",
number = "8",
pages = "997--1005",
month = aug,
year = "2004",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2004.03.018",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Dec 4 15:15:10 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
abstract = "In many projects the true costs of high performance
computing are currently dominated by software.
Addressing these costs may require shifting to higher
level languages such as Matlab. MatlabMPI is a Matlab
implementation of the Message Passing Interface (MPI)
standard and allows any Matlab program to exploit
multiple processors. MatlabMPI currently implements the
basic six functions that are the core of the MPI
point-to-point communications standard. The key
technical innovation of MatlabMPI is that it implements
the widely used MPI ``look and feel'' on top of
standard Matlab file I/O, resulting in an extremely
compact ($ \approx 350 $ lines of code) and ``pure''
implementation which runs anywhere Matlab runs, and on
any heterogeneous combination of computers. The
performance has been tested on both shared and
distributed memory parallel computers (e.g. Sun, SGI,
HP, IBM, Linux, MacOSX and Windows). MatlabMPI can
match the bandwidth of C based MPI at large message
sizes. A test image filtering application using
MatlabMPI achieved a speedup of $ \approx 300 $ using
304 CPUs and $ \approx 15 \% $ of the theoretical peak
(450 Gigaflops) on an IBM SP2 at the Maui High
Performance Computing Center. In addition, this entire
parallel benchmark application was implemented in 70
software-lines-of-code, illustrating the high
productivity of this approach. MatlabMPI is available
for download on the web (www.ll.mit.edu/MatlabMPI).",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Book{Ladd:2004:GPP,
author = "Scott Ladd",
title = "Guide to Parallel Programming",
publisher = pub-SV,
address = pub-SV:adr,
pages = "465 (est.)",
year = "2004",
ISBN = "0-387-40577-1",
ISBN-13 = "978-0-387-40577-3",
LCCN = "????",
bibdate = "Wed Aug 27 06:31:34 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Includes CD-ROM.",
acknowledgement = ack-nhfb,
tableofcontents = "ntroduction; Supercomputing; Tools for Parallel
Programming; Introducing OpenMP; Parallel Loops with
Open MP; Advanced OpenMP; Message passing with MPI;
Deeper MPI; Design of data and algorithms;
Optimization; Debugging the hydra; Parallel in
parallel--MPI and OpenMP together; Elaborations;
Resources; Index",
}
@InProceedings{Liu:2004:BMI,
author = "Jiuxing Liu and Abhinav Vishnu and Dhabaleswar K.
Panda",
title = "Building Multirail {InfiniBand} Clusters: {MPI}-Level
Design and Performance Evaluation",
crossref = "ACM:2004:SHP",
pages = "33--33",
year = "2004",
bibdate = "Tue Dec 27 07:57:20 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Liu:2004:HPR,
author = "Jiuxing Liu and Jiesheng Wu and Dhabaleswar K. Panda",
title = "High Performance {RDMA}-Based {MPI} Implementation
over {InfiniBand}",
journal = j-INT-J-PARALLEL-PROG,
volume = "32",
number = "3",
pages = "167--198",
month = jun,
year = "2004",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1023/B:IJPP.0000029272.69895.c1",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 6 16:40:03 MDT 2005",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=32&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=32&issue=3&spage=167",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@InProceedings{Lu:2004:AFS,
author = "Charng-da Lu and Daniel A. Reed",
title = "Assessing Fault Sensitivity in {MPI} Applications",
crossref = "ACM:2004:SHP",
pages = "37--37",
year = "2004",
bibdate = "Tue Dec 27 07:57:20 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Luecke:2004:PSM,
author = "Glenn R. Luecke and Marina Kraeva and Jing Yuan and
Silvia Spanoyannis",
title = "Performance and scalability of {MPI} on {PC}
clusters",
journal = j-CCPE,
volume = "16",
number = "1",
pages = "79--107",
month = jan,
year = "2004",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.749",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Jan 13 09:28:19 MST 2004",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "4 Dec 2003",
}
@Article{Luecke:2004:PSS,
author = "Glenn R. Luecke and Silvia Spanoyannis and Marina
Kraeva",
title = "The performance and scalability of {SHMEM} and {MPI-2}
one-sided routines on a {SGI Origin 2000} and a {Cray
T3E-600}",
journal = j-CCPE,
volume = "16",
number = "10",
pages = "1037--1060",
day = "25",
month = aug,
year = "2004",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.796",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat May 14 11:30:55 MDT 2005",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "10 Jun 2004",
}
@Article{Marowka:2004:OOA,
author = "Ami Marowka and Zhenying Liu and Barbara Chapman",
title = "{OpenMP-oriented} applications for distributed shared
memory architectures",
journal = j-CCPE,
volume = "16",
number = "4",
pages = "371--384",
day = "10",
month = apr,
year = "2004",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.752",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat May 14 11:30:53 MDT 2005",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "19 Jan 2004",
}
@Article{Martin:2004:HPA,
author = "Mar{\'\i}a J. Mart{\'\i}n and Marta Parada and
Ram{\'o}n Doallo",
title = "High Performance Air Pollution Simulation Using
{OpenMP}",
journal = j-J-SUPERCOMPUTING,
volume = "28",
number = "3",
pages = "311--321",
month = jun,
year = "2004",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Dec 4 12:39:13 MST 2004",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.wkap.nl/journalhome.htm/0920-8542",
URL = "http://ipsapp008.kluweronline.com/IPS/content/ext/x/J/5189/I/54/A/5/abstract.htm",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Book{Mertens:2004:CCP,
author = "Stephan Mertens and Alexander Schinner",
title = "{Cluster Computing: Praktische Einf{\"u}hrung in das
wissenschaftliche Rechnen auf Workstation-Clustern}",
publisher = pub-SV,
address = pub-SV:adr,
pages = "300 (est.)",
year = "2004",
ISBN = "3-540-42299-4",
ISBN-13 = "978-3-540-42299-0",
LCCN = "????",
bibdate = "Wed Aug 27 06:33:33 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Includes CD-ROM.",
acknowledgement = ack-nhfb,
}
@InProceedings{Mohror:2004:PTS,
author = "Kathryn Mohror and Karen L. Karavanic",
title = "Performance Tool Support for {MPI-2} on {Linux}",
crossref = "ACM:2004:SHP",
pages = "28--28",
year = "2004",
bibdate = "Tue Dec 27 07:57:20 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Park:2004:DID,
author = "K.-L. Park and H.-J. Lee and O.-Y. Kwon and S.-Y. Park
and H.-W. Park and S.-D. Kim",
title = "Design and Implementation of a Dynamic Communication
{MPI} Library for the Grid",
journal = j-INT-J-COMPUT-APPL,
volume = "26",
number = "3",
pages = "1--8",
year = "2004",
DOI = "https://doi.org/10.1080/1206212X.2004.11441738",
ISSN = "1206-212X (print), 1925-7074 (electronic)",
ISSN-L = "1206-212X",
bibdate = "Sat Apr 21 17:21:44 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijca.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.tandfonline.com/doi/full/10.1080/1206212X.2004.11441738",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Computer Applications",
journal-URL = "https://www.tandfonline.com/loi/tjca20",
online-date = "11 Jul 2015",
}
@InProceedings{Schulz:2004:IES,
author = "Martin Schulz and Greg Bronevetsky and Rohit Fernandes
and Daniel Marques and Keshav Pingali and Paul
Stodghill",
title = "Implementation and Evaluation of a Scalable
Application-Level Checkpoint-Recovery Scheme for {MPI}
Programs",
crossref = "ACM:2004:SHP",
pages = "38--38",
year = "2004",
bibdate = "Tue Dec 27 07:57:20 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Sievert:2004:SMP,
author = "Otto Sievert and Henri Casanova",
title = "A Simple {MPI} Process Swapping Architecture for
Iterative Applications",
journal = j-IJHPCA,
volume = "18",
number = "3",
pages = "341--352",
month = "Fall",
year = "2004",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342004047430",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/18/3.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/18/3/341.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Skjellum:2004:RTM,
author = "Anthony Skjellum and Arkady Kanevsky and Yoginder S.
Dandass and Jerrell Watts and Steve Paavola and Dennis
Cottel and Greg Henley and L. Shane Hebert and Zhenqian
Cui and Anna Rounbehler and {The Real-Time Message
Passing Interface (Mpi and Rt) Forum}",
title = "The {Real-Time Message Passing Interface Standard
(MPI\slash RT-1.1)}",
journal = j-CCPE,
volume = "16",
number = "S1",
pages = "Si--S322",
day = "25",
month = dec,
year = "2004",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.744",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat May 14 11:30:56 MDT 2005",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "22 Nov 2004",
}
@Article{Smith:2004:SIP,
author = "Kevin B. Smith and Aart J. C. Bik and Xinmin Tian",
title = "Support for the {Intel{\reg} Pentium{\reg} 4}
Processor with Hyper-Threading Technology in
{Intel{\reg}} 8.0 Compilers",
journal = j-INTEL-TECH-J,
volume = "8",
number = "1",
pages = "19--31",
month = feb,
year = "2004",
ISSN = "1535-766X",
bibdate = "Mon Jul 11 08:46:53 2005",
bibsource = "http://developer.intel.com/technology/itj/archive/2004.htm;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://developer.intel.com/technology/itj/2004/volume08issue01/art02_compilers/p01_abstract.htm",
acknowledgement = ack-nhfb,
keywords = "Compilers; Hyper-Threading Technology; Intel Pentium 4
processor; OpenMP; Optimization; Vectorization",
}
@Article{Vrenios:2004:PPC,
author = "A. Vrenios",
title = "{Parallel Programming in C with MPI and OpenMP} [Book
Review]",
journal = j-IEEE-DISTRIB-SYST-ONLINE,
volume = "5",
number = "1",
pages = "7.1--7.3",
month = "????",
year = "2004",
CODEN = "????",
ISSN = "1541-4922 (print), 1558-1683 (electronic)",
ISSN-L = "1541-4922",
bibdate = "Fri Jul 15 17:50:13 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ieeexplore.ieee.org/iel5/8968/28452/01270716.pdf?isnumber=28452&prod=JNL&arnumber=1270716&arSt=+7.1&ared=+7.3&arAuthor=Vrenios%2C+A.;
http://ieeexplore.ieee.org/xpls/abs_all.jsp?isnumber=28452&arnumber=1270716&count=8&index=5",
acknowledgement = ack-nhfb,
fjournal = "IEEE Distributed Systems Online",
}
@Book{White:2004:CMM,
author = "R. E. (Robert E.) White",
title = "Computational Mathematics: Models, Methods, and
Analysis with {MATLAB} and {MPI}",
publisher = pub-CHAPMAN-HALL-CRC,
address = pub-CHAPMAN-HALL-CRC:adr,
pages = "xvi + 385",
year = "2004",
ISBN = "1-58488-364-2",
ISBN-13 = "978-1-58488-364-7",
LCCN = "QA297 .W495 2004",
bibdate = "Tue Apr 26 09:31:54 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
acknowledgement = ack-nhfb,
subject = "Numerical analysis; MATLAB; Computer interfaces;
Parallel programming (Computer science)",
}
@Article{Zeyao:2004:AMI,
author = "Mo Zeyao and Huang Zhengfeng",
title = "Application of {MPI-IO} in Parallel Particle Transport
{Monte--Carlo} Simulation",
journal = j-PARALLEL-ALGORITHMS-APPL,
volume = "19",
number = "4",
pages = "227--236",
month = "????",
year = "2004",
CODEN = "PAAPEC",
DOI = "https://doi.org/10.1080/10637190412331295166",
ISSN = "1063-7192",
ISSN-L = "1026-7689",
bibdate = "Thu Jul 10 21:46:37 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.informaworld.com/smpp/content~content=a714592658",
acknowledgement = ack-nhfb,
fjournal = "Parallel Algorithms and Applications",
journal-URL = "http://www.tandfonline.com/loi/gpaa20",
}
@Article{Zhang:2004:PMV,
author = "Xin Zhang and Lingli Ding and Elke A. Rundensteiner",
title = "Parallel multisource view maintenance",
journal = j-VLDB-J,
volume = "13",
number = "1",
pages = "22--48",
month = jan,
year = "2004",
CODEN = "VLDBFR",
DOI = "https://doi.org/10.1007/s00778-003-0086-0",
ISSN = "1066-8888 (print), 0949-877X (electronic)",
ISSN-L = "1066-8888",
bibdate = "Mon Jun 23 10:51:09 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "In a distributed environment, materialized views are
used to integrate data from different information
sources and then store them in some centralized
location. In order to maintain such materialized views,
maintenance queries need to be sent to information
sources by the data warehouse management system. Due to
the independence of the information sources and the
data warehouse, concurrency issues are raised between
the maintenance queries and the local update
transactions at each information source. Recent
solutions such as ECA and Strobe tackle such concurrent
maintenance, however with the requirement of quiescence
of the information sources. SWEEP and POSSE overcome
this limitation by decomposing the global maintenance
query into smaller subqueries to be sent to every
information source and then performing conflict
correction locally at the data warehouse. Note that all
these previous approaches handle the data updates {\em
one at a time}. Hence either some of the information
sources or the data warehouse is likely to be idle
during most of the maintenance process. In this paper,
we propose that a set of updates should be maintained
in parallel by several concurrent maintenance processes
so that both the information sources as well as the
warehouse would be utilized more fully throughout the
maintenance process. This parallelism should then
improve the overall maintenance performance. For this
we have developed a parallel view maintenance
algorithm, called PVM, that substantially improves upon
the performance of previous maintenance approaches by
handling a set of data updates at the same time. The
parallel handling of a set of updates is orthogonal to
the particular maintenance algorithm applied to the
handling of each individual update. In order to perform
parallel view maintenance, we have identified two
critical issues that must be overcome: (1) detecting
maintenance-concurrent data updates in a parallel mode
and (2) correcting the problem that the data warehouse
commit order may not correspond to the data warehouse
update processing order due to parallel maintenance
handling. In this work, we provide solutions to both
issues. For the former, we insert a middle-layer
timestamp assignment module for detecting
maintenance-concurrent data updates without requiring
any global clock synchronization. For the latter, we
introduce the negative counter concept to solve the
problem of variant orders of committing effects of data
updates to the data warehouse. We provide a proof of
the correctness of PVM that guarantees that our
strategy indeed generates the correct final data
warehouse state. We have implemented both SWEEP and PVM
in our EVE data warehousing system. Our performance
study demonstrates that a manyfold performance
improvement is achieved by PVM over SWEEP.",
acknowledgement = ack-nhfb,
fjournal = "VLDB Journal: Very Large Data Bases",
journal-URL = "http://portal.acm.org/toc.cfm?id=J869",
keywords = "concurrent data updates; data warehousing; parallel
view maintenance; performance evaluation",
}
@Article{Almasi:2005:DIM,
author = "G. Alm{\'a}si and C. Archer and J. G. Casta{\~n}os and
J. A. Gunnels and C. C. Erway and P. Heidelberger and
X. Martorell and J. E. Moreira and K. Pinnow and J.
Ratterman and B. D. Steinmacher-Burow and W. Gropp and
B. Toonen",
title = "Design and implementation of message-passing services
for the {Blue Gene/L} supercomputer",
journal = j-IBM-JRD,
volume = "49",
number = "2/3",
pages = "393--406",
month = "????",
year = "2005",
CODEN = "IBMJAE",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Wed Jun 1 08:14:41 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.research.ibm.com/journal/",
URL = "http://www.research.ibm.com/journal/rd/492/almasi.pdf",
abstract = "The Blue Gene/L (BG/L) supercomputer, with 65,536
dual-processor compute nodes, was designed from the
ground up to support efficient execution of massively
parallel message-passing programs. Part of this support
is an optimized implementation of the Message Passing
Interface (MPI), which leverages the hardware features
of BG/L. MPI for BG/L is implemented on top of a more
basic message-passing infrastructure called the message
layer. This message layer can be used both to implement
other higher-level libraries and directly by
applications. MPI and the message layer are used in the
two BG/L modes of operation: the coprocessor mode and
the virtual node mode. Performance measurements show
that our message-passing services deliver performance
close to the hardware limits of the machine. They also
show that dedicating one of the processors of a node to
communication functions (coprocessor mode) greatly
improves the message-passing bandwidth, whereas running
two processes per compute node (virtual node mode) can
have a positive impact on application performance.",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
ordernumber = "G322-0240",
}
@Article{Aversa:2005:HDS,
author = "Rocco Aversa and Beniamino {Di Martino} and Nicola
Mazzocca and Salvatore Venticinque",
title = "A hierarchical distributed-shared memory parallel
{Branch \& Bound} application with {PVM} and {OpenMP}
for multiprocessor clusters",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1034--1047",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Aversa:2005:PPT,
author = "Rocco Aversa and Beniamino {Di Martino} and
Massimiliano Rak and Salvatore Venticinque and Umberto
Villano",
title = "Performance prediction through simulation of a hybrid
{MPI\slash OpenMP} application",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1013--1033",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Bernaschi:2005:ERA,
author = "Massimo Bernaschi and Giulio Iannello and Saverio
Crea",
title = "Experimental Results About {MPI} Collective
Communication Operations",
journal = j-PARALLEL-PROCESS-LETT,
volume = "15",
number = "1/2",
pages = "223--236",
month = mar # "\slash " # jun,
year = "2005",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626405002179",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Bhanot:2005:OTL,
author = "G. Bhanot and A. Gara and P. Heidelberger and E.
Lawless and J. C. Sexton and R. Walkup",
title = "Optimizing task layout on the {Blue Gene/L}
supercomputer",
journal = j-IBM-JRD,
volume = "49",
number = "2/3",
pages = "489--500",
month = "????",
year = "2005",
CODEN = "IBMJAE",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Wed Jun 1 08:14:41 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.research.ibm.com/journal/",
URL = "http://www.research.ibm.com/journal/rd/492/bhanot.pdf",
abstract = "A general method for optimizing problem layout on the
Blue Gene/L (BG/L) supercomputer is described. The
method takes as input the communication matrix of an
arbitrary problem as an array with entries $ C(i, j) $,
which represents the data communicated from domain $i$
to domain $j$. Given $ C(i, j) $, we implement a
heuristic map that attempts to sequentially map a
domain and its communication neighbors either to the
same BG/L node or to near-neighbor nodes on the BG/L
torus, while keeping the number of domains mapped to a
BG/L node constant. We then generate a Markov chain of
maps using Monte Carlo simulation with free energy $ F
= \sum_{i, j} C(i, j)H(i, j) $, where $ H(i, j) $ is
the smallest number of hops on the BG/L torus between
domain $i$ and domain $j$. For two large parallel
applications, SAGE and UMT2000, the method was tested
against the default Message Passing Interface rank
order layout on up to 2,048 BG/L nodes. It produced
maps that improved communication efficiency by up to
45\%.",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
ordernumber = "G322-0240",
}
@Article{Blikberg:2005:LBO,
author = "R. Blikberg and T. S{\o}revik",
title = "Load balancing and {OpenMP} implementation of nested
parallelism",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "984--998",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Brightwell:2005:AIO,
author = "Ron Brightwell and Rolf Riesen and Keith D.
Underwood",
title = "Analyzing the Impact of Overlap, Offload, and
Independent Progress for {Message Passing Interface}
Applications",
journal = j-IJHPCA,
volume = "19",
number = "2",
pages = "103--117",
month = "Summer",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005054257",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/2/103.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Chan:2005:CCI,
author = "Albert Chan and Frank Dehne and Ryan Taylor",
title = "{CGMGRAPH\slash CGMLIB}: Implementing and Testing
{CGM} Graph Algorithms on {PC} Clusters and Shared
Memory Machines",
journal = j-IJHPCA,
volume = "19",
number = "1",
pages = "81--97",
month = "Spring",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005051196",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/1.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/1/81.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Chapman:2005:O,
author = "Barbara M. Chapman and Federico Massaioli",
title = "{OpenMP}",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "957--959",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Dalcin:2005:MP,
author = "Lisandro Dalc{\'\i}n and Rodrigo Paz and Mario
Storti",
title = "{MPI} for {Python}",
journal = j-J-PAR-DIST-COMP,
volume = "65",
number = "9",
pages = "1108--1115",
month = sep,
year = "2005",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:33 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Duran:2005:RAP,
author = "A. Duran and R. Silvera and J. Corbalan and J.
Labarta",
booktitle = "Shared Memory Parallel Programming with {OpenMP}",
title = "Runtime Adjustment of Parallel Nested Loops",
journal = j-LECT-NOTES-COMP-SCI,
volume = "3349",
pages = "137--??",
year = "2005",
bibdate = "Mon Oct 07 09:29:01 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Eleftheriou:2005:SFF,
author = "M. Eleftheriou and B. G. Fitch and A. Rayshubskiy and
T. J. C. Ward and R. S. Germain",
title = "Scalable framework for {$3$D} {FFTs} on the {Blue
Gene/L} supercomputer: Implementation and early
performance measurements",
journal = j-IBM-JRD,
volume = "49",
number = "2/3",
pages = "457--464",
month = "????",
year = "2005",
CODEN = "IBMJAE",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Wed Jun 1 08:14:41 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.research.ibm.com/journal/",
URL = "http://www.research.ibm.com/journal/rd/492/eleftheriou.pdf",
abstract = "This paper presents results on a
communications-intensive kernel, the three-dimensional
fast Fourier transform (3D FFT), running on the
2,048-node Blue Gene/L (BG/L) prototype. Two
implementations of the volumetric FFT algorithm were
characterized, one built on the Message Passing
Interface library and another built on an active packet
Application Program Interface supported by the hardware
bring-up environment, the BG/L advanced diagnostics
environment. Preliminary performance experiments on the
BG/L prototype indicate that both of our
implementations scale well up to 1,024 nodes for $3$D
FFTs of size $ 128 \time 128 \times 128 $. The
performance of the volumetric FFT is also compared with
that of the Fastest Fourier Transform in the West
(FFTW) library. In general, the volumetric FFT
outperforms a port of the FFTW Version 2.1.5 library on
large-node-count partitions.",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
ordernumber = "G322-0240",
}
@Article{Florez:2005:LMM,
author = "German Florez and Zhen Liu and Susan M. Bridges and
Anthony Skjellum and Rayford B. Vaughn",
title = "Lightweight monitoring of {MPI} programs in real
time",
journal = j-CCPE,
volume = "17",
number = "13",
pages = "1547--1578",
month = nov,
year = "2005",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.889",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Oct 4 06:07:02 MDT 2005",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "24 Jun 2005",
}
@Article{Floros:2005:TGS,
author = "Evangelos Floros and Yiannis Cotronis",
title = "Towards a {Grid} Services Based Framework for the
Virtualization, Execution and Composition of {MPI}
Applications",
journal = j-PARALLEL-PROCESS-LETT,
volume = "15",
number = "1/2",
pages = "85--98",
month = mar # "\slash " # jun,
year = "2005",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626405002076",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Gabriel:2005:EDC,
author = "Edgar Gabriel and Graham E. Fagg and Jack J.
Dongarra",
title = "Evaluating Dynamic Communicators and One-Sided
Operations for Current {MPI} Libraries",
journal = j-IJHPCA,
volume = "19",
number = "1",
pages = "67--79",
month = "Spring",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005051197",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/1/67.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Grove:2005:CBP,
author = "D. A. Grove and P. D. Coddington",
title = "Communication Benchmarking and Performance Modelling
of {MPI} Programs on Cluster Computers",
journal = j-J-SUPERCOMPUTING,
volume = "34",
number = "2",
pages = "201--217",
month = nov,
year = "2005",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-005-2340-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:26 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=34&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=34&issue=2&spage=201",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "cluster computing; parallel computing; performance
modelling",
}
@Article{Hadjidoukas:2005:OEM,
author = "P. E. Hadjidoukas and T. S. Papatheodorou",
title = "{OpenMP} extensions for master-slave message passing
computing",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1155--1167",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Huang:2005:TME,
author = "Lei Huang and Barbara Chapman and Zhenying Liu",
title = "Towards a more efficient implementation of {OpenMP}
for clusters via translation to global arrays",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1114--1139",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Hurwitz:2005:AMP,
author = "Justin (Gus) Hurwitz and Wu-chun Feng",
title = "Analyzing {MPI} performance over 10-Gigabit
{Ethernet}",
journal = j-J-PAR-DIST-COMP,
volume = "65",
number = "10",
pages = "1253--1260",
month = oct,
year = "2005",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:34 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Ierotheou:2005:GOC,
author = "C. S. Ierotheou and H. Jin and G. Matthews and S. P.
Johnson and R. Hood",
title = "Generating {OpenMP} code using an interactive
parallelization environment",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "999--1012",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Izaguirre:2005:PMS,
author = "Jes{\'u}s A. Izaguirre and Scott S. Hampton and
Thierry Matthey",
title = "Parallel multigrid summation for the {$N$}-body
problem",
journal = j-J-PAR-DIST-COMP,
volume = "65",
number = "8",
pages = "949--962",
month = aug,
year = "2005",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:33 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
abstract = "An $ \Theta (n) $ parallel multigrid summation method
(MG) for the N-body problem is presented. The method
was originally devised for vacuum boundary conditions.
Here, it is extended to periodic boundary conditions
and implemented in parallel using force decomposition
and MPI. MG is based on a hierarchical decomposition of
computational kernels on multiple grids. For low
accuracy calculations, appropriate for molecular
dynamics, a sequential implementation is as fast or
faster than particle mesh Ewald (PME). Our parallel
implementation is more scalable than PME. The method
can be combined with multiple time stepping integrators
to produce a powerful simulation protocol for
simulation of biological molecules and other materials.
The parallel implementation is tested on both a Linux
cluster with Myrinet interconnect and a shared memory
computer. It is available as open-source at
http://protomol.sourceforge.net. An auxiliary tool
allows the automatic selection of optimal parameters
for MG, and is available at
http://mdsimaid.cse.nd.edu.",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Jost:2005:WMP,
author = "G. Jost and J. Labarta and J. Gimenez",
editor = "????",
booktitle = "Shared Memory Parallel Programming with {OpenMP}",
title = "What Multilevel Parallel Programs do when you are not
watching: a Performance analysis case study comparing
{MPI\slash OpenMP}, {MLP}, and {Nested OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "3349",
pages = "29--??",
year = "2005",
bibdate = "Mon Oct 07 09:04:25 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Jung:2005:DIM,
author = "Hyungsoo Jung and Dongin Shin and Hyuck Han and Jai W.
Kim and Heon Y. Yeom and Jongsuk Lee",
title = "Design and Implementation of Multiple Fault-Tolerant
{MPI} over {Myrinet} ({$ M^3 $})",
crossref = "ACM:2005:PAI",
pages = "32--32",
year = "2005",
bibdate = "Tue Dec 27 07:58:16 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Kamal:2005:SVT,
author = "Humaira Kamal and Brad Penoff and Alan Wagner",
title = "{SCTP} versus {TCP} for {MPI}",
crossref = "ACM:2005:PAI",
pages = "30--30",
year = "2005",
bibdate = "Tue Dec 27 07:58:16 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@InProceedings{Kappiah:2005:JTD,
author = "Nandini Kappiah and Vincent W. Freeh and David K.
Lowenthal",
title = "Just In Time Dynamic Voltage Scaling: Exploiting
Inter-Node Slack to Save Energy in {MPI} Programs",
crossref = "ACM:2005:PAI",
pages = "33--33",
year = "2005",
bibdate = "Tue Dec 27 07:58:16 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Karwande:2005:MPC,
author = "Amit Karwande and Xin Yuan and David K. Lowenthal",
title = "An {MPI} prototype for compiled communication on
{Ethernet} switched clusters",
journal = j-J-PAR-DIST-COMP,
volume = "65",
number = "10",
pages = "1123--1133",
month = oct,
year = "2005",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:34 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Misc{Kepner:2005:PPM,
author = "Jeremy Kepner",
title = "Parallel Programming with {MatlabMPI}",
howpublished = "World-Wide Web site.",
year = "2005",
bibdate = "Mon Dec 05 08:36:15 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.ll.mit.edu/MatlabMPI/",
abstract = "MatlabMPI is set of Matlab scripts that implement a
subset of MPI and allow any Matlab program to be run on
a parallel computer. The key innovation of MatlabMPI is
that it implements the widely used MPI ``look and
feel'' on top of standard Matlab file i/o, resulting in
a ``pure'' Matlab implementation that is exceedingly
small (about 300 lines of code). Thus, MatlabMPI will
run on any combination of computers that Matlab
supports. In addition, because of its small size, it is
simple to download and use (and modify if you like).",
acknowledgement = ack-nhfb,
keywords = "Matlab; MatlabMPI; MPI; parallel processing",
}
@Article{Kranzlmuller:2005:RAP,
author = "Dieter Kranzlm{\"u}ller and Peter Kacsuk and Jack
Dongarra",
title = "Recent Advances in {Parallel Virtual Machine} and
{Message Passing Interface}",
journal = j-IJHPCA,
volume = "19",
number = "2",
pages = "99--101",
month = "Summer",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005054256",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/2/99.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Liu:2005:EIO,
author = "Z. Liu and L. Huang and B. Chapman and T. Weng",
booktitle = "Shared Memory Parallel Programming with {OpenMP}",
title = "Efficient Implementation of {OpenMP} for Clusters with
Implicit Data Distribution",
journal = j-LECT-NOTES-COMP-SCI,
volume = "3349",
pages = "121--??",
year = "2005",
bibdate = "Mon Oct 07 09:16:10 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Marowka:2005:EMT,
author = "Ami Marowka",
title = "Execution model of three parallel languages: {OpenMP},
{UPC} and {CAF}",
journal = j-SCI-PROG,
volume = "13",
number = "2",
pages = "127--135",
month = "????",
year = "2005",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Wed Sep 1 14:50:28 MDT 2010",
bibsource = "http://www.iospress.nl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Martorell:2005:BGP,
author = "X. Martorell and N. Smeds and R. Walkup and J. R.
Brunheroto and G. Alm{\'a}si and J. A. Gunnels and L.
DeRose and J. Labarta and F. Escal{\'e} and J.
Gim{\'e}nez and H. Servat and J. E. Moreira",
title = "{Blue Gene/L} performance tools",
journal = j-IBM-JRD,
volume = "49",
number = "2/3",
pages = "407--424",
month = "????",
year = "2005",
CODEN = "IBMJAE",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Wed Jun 1 08:14:41 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.research.ibm.com/journal/",
URL = "http://www.research.ibm.com/journal/rd/492/martorell.pdf",
abstract = "Good performance monitoring is the basis of modern
performance analysis tools for application
optimization. We are providing a variety of such
performance analysis tools for the new Blue Gene/L
supercomputer. Those tools can be divided into two
categories: single-node performance tools and multinode
performance tools. From a single-node perspective, we
provide standard interfaces and libraries, such as PAPI
and libHPM, that provide access to the hardware
performance counters for applications running on the
Blue Gene/L compute nodes. From a multinode
perspective, we focus on tools that analyze Message
Passing Interface (MPI) behavior. Those tools work by
first collecting message-passing trace data when a
program runs. The trace data is then used by graphical
interface tools that analyze the behavior of
applications. Using the current prototype tools, we
demonstrate their usefulness and applicability with
case studies of application optimization.",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
ordernumber = "G322-0240",
}
@Article{Massaioli:2005:OPA,
author = "Federico Massaioli and Filippo Castiglione and Massimo
Bernaschi",
title = "{OpenMP} parallelization of agent-based models",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1066--1081",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Book{Mattson:2005:PPP,
author = "Timothy G. Mattson and Beverly A. Sanders and Berna
Massingill",
title = "Patterns for Parallel Programming",
publisher = pub-AW,
address = pub-AW:adr,
pages = "xiii + 355",
year = "2005",
ISBN = "0-321-22811-1 (hardcover)",
ISBN-13 = "978-0-321-22811-6 (hardcover)",
LCCN = "QA76.642 .M38 2005",
bibdate = "Sat Oct 5 10:09:33 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
URL = "http://www.loc.gov/catdir/toc/ecip0418/2004013240.html",
abstract = "The Parallel Programming Guide for Every Software
Developer From grids and clusters to next-generation
game consoles, parallel computing is going mainstream.
Innovations such as Hyper-Threading Technology,
HyperTransport Technology, and multicore
microprocessors from IBM, Intel, and Sun are
accelerating the movement's growth. Only one thing is
missing: programmers with the skills to meet the
soaring demand for parallel software. That's where
Patterns for Parallel Programming comes in. It's the
first parallel programming guide written specifically
to serve working software developers, not just computer
scientists. The authors introduce a complete, highly
accessible pattern language that will help any
experienced developer ``think parallel''-and start
writing effective parallel code almost immediately.
Instead of formal theory, they deliver proven solutions
to the challenges faced by parallel programmers, and
pragmatic guidance for using today's parallel APIs in
the real world. Coverage includes: Understanding the
parallel computing landscape and the challenges faced
by parallel developers Finding the concurrency in a
software design problem and decomposing it into
concurrent tasks Managing the use of data across tasks
Creating an algorithm structure that effectively
exploits the concurrency you've identified Connecting
your algorithmic structures to the APIs needed to
implement them Specific software constructs for
implementing parallel programs Working with today's
leading parallel programming environments: OpenMP, MPI,
and Java Patterns have helped thousands of programmers
master object-oriented development and other complex
programming technologies. With this book, you will
learn that they're the best way to master parallel
programming too.",
acknowledgement = ack-nhfb,
author-dates = "1958--",
subject = "Parallel programming (Computer science)",
tableofcontents = "1: A pattern language for parallel programming \\
2: Background and jargon of parallel computing \\
3: The finding concurrency design space \\
4: The algorithm structure design space \\
5: The supporting structures design space \\
6: The implementation mechanisms design space \\
Appendix A: A brief introduction to OpenMP \\
Appendix B: A brief introduction to MPI \\
Appendix C: A brief introduction to concurrent
programming in Java",
}
@InProceedings{Mavriplis:2005:HRAa,
author = "Dimitri J. Mavriplis and Michael J. Aftosmis and
Marsha Berger",
title = "High Resolution Aerospace Applications using the {NASA
Columbia Supercomputer}",
crossref = "ACM:2005:PAI",
pages = "61--61",
year = "2005",
bibdate = "Tue Dec 27 07:58:16 MST 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper focuses on the parallel performance of two
high-performance aerodynamic simulation packages on the
newly installed NASA Columbia supercomputer. These
packages include both a high-fidelity, unstructured,
Reynolds-averaged Navier--Stokes solver, and a
fully-automated inviscid flow package for cut-cell
Cartesian grids. The complementary combination of these
two simulation codes enables high-fidelity
characterization of aerospace vehicle design
performance over the entire flight envelope through
extensive parametric analysis and detailed simulation
of critical regions of the flight envelope. Both
packages are industrial-level codes designed for
complex geometry and incorporate customized multigrid
solution algorithms. The performance of these codes on
Columbia is examined using both MPI and OpenMP and
using both the NUMAlink and InfiniBand interconnect
fabrics. Numerical results demonstrate good scalability
on up to 2016 cpus using the NUMAlink4 interconnect,
with measured computational rates in the vicinity of 3
TFLOP/s, while InfiniBand showed some performance
degradation at high CPU counts, particularly with
multigrid. Nonetheless, the results are encouraging
enough to indicate that larger test cases using
combined MPI/OpenMP communication should scale well on
even more processors.",
acknowledgement = ack-nhfb,
remark = "Co-winner of best paper award. Also published in
\cite{Mavriplis:2007:HRAb}.",
}
@Article{Medvedev:2005:OMA,
author = "Dmitry M. Medvedev and Evelyn M. Goldfield and Stephen
K. Gray",
title = "An {OpenMP\slash MPI} approach to the parallelization
of iterative four-atom quantum mechanics",
journal = j-COMP-PHYS-COMM,
volume = "166",
number = "2",
pages = "94--108",
day = "1",
month = mar,
year = "2005",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2004.11.001",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:41:51 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465504005260",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Midorikawa:2005:PNM,
author = "Edson Toshimi Midorikawa and Helio Marci Oliveira and
Jean Marcos Laine",
title = "{PEMPIs}: a New Methodology for Modeling and
Prediction of {MPI} Programs Performance",
journal = j-INT-J-PARALLEL-PROG,
volume = "33",
number = "5",
pages = "499--527",
month = oct,
year = "2005",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-005-7303-y",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:05:39 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=33&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=33&issue=5&spage=499",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "analytical modeling; graphical models; message
passing; MPI; Performance prediction",
}
@Article{Nagle:2005:BRM,
author = "Dan Nagle",
title = "Book Review: {{\em MPI --- The Complete Reference,
Vol. 1, The MPI Core}, 2nd ed., Scientific and
Engineering Computation Series, by Marc Snir, Steve
Otto, Steven Huss--Lederman, David Walker and Jack
Dongarra}",
journal = j-SCI-PROG,
volume = "13",
number = "1",
pages = "57--63",
month = "????",
year = "2005",
CODEN = "SCIPEV",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Wed Sep 1 14:50:28 MDT 2010",
bibsource = "http://www.iospress.nl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Nakajima:2005:PIS,
author = "Kengo Nakajima",
title = "Parallel iterative solvers for finite-element methods
using an {OpenMP\slash MPI} hybrid programming model on
the {Earth Simulator}",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1048--1065",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Nakajima:2005:TLH,
author = "Kengo Nakajima",
title = "Three-level hybrid vs. flat {MPI} on the {Earth
Simulator}: Parallel iterative solvers for
finite-element method",
journal = j-APPL-NUM-MATH,
volume = "54",
number = "2",
pages = "237--255",
month = jul,
year = "2005",
CODEN = "ANMAEL",
ISSN = "0168-9274 (print), 1873-5460 (electronic)",
ISSN-L = "0168-9274",
bibdate = "Tue Aug 24 11:17:20 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/01689274",
acknowledgement = ack-nhfb,
fjournal = "Applied Numerical Mathematics: Transactions of IMACS",
journal-URL = "http://www.sciencedirect.com/science/journal/01689274",
}
@Article{Norcen:2005:HPJ,
author = "Roland Norcen and Andreas Uhl",
title = "High performance {JPEG 2000} and {MPEG-4 VTC} on
{SMPs} using {OpenMP}",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1082--1098",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Orlando:2005:PSP,
author = "Salvatore Orlando and Domenico Laforenza",
title = "Preface: Selected Papers from the {EUROPVM\slash MPI
2003 Conference, Venice, Italy, 29 September--2 October
2003}",
journal = j-IJHPCA,
volume = "19",
number = "1",
pages = "47--47",
month = "Spring",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005051520",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/1/47.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Park:2005:SOA,
author = "Inho Park and Seon Wook Kim",
title = "Study of {OpenMP} applications on the
{InfiniBand}-based software distributed shared-memory
system",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "1099--1113",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Book{Pharr:2005:GGP,
editor = "Matt Pharr and Randima Fernando",
title = "{GPU} gems 2: programming techniques for
high-performance graphics and general-purpose
computation",
volume = "2",
publisher = pub-AW,
address = pub-AW:adr,
pages = "xlix + 814",
year = "2005",
ISBN = "0-321-33559-7 (hardcover)",
ISBN-13 = "978-0-321-33559-3 (hardcover)",
LCCN = "T385 .G688 2005",
bibdate = "Thu Jul 29 13:36:54 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/numana2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib;
z3950.loc.gov:7090/Voyager",
series = "GPU gems",
URL = "http://www-docs.tu-cottbus.de/bibliothek/public/katalog/420569.PDF;
http://www.loc.gov/catdir/toc/ecip055/2004030181.html",
abstract = "This sequel to the best-selling, first volume of GPU
Gems details the latest programming techniques for
today's graphics processing units (GPUs). As GPUs find
their way into mobile phones, handheld gaming devices,
and consoles, GPU expertise is even more critical in
today's competitive environment. Real-time graphics
programmers will discover the latest algorithms for
creating advanced visual effects, strategies for
managing complex scenes, and techniques for advanced
image processing. Readers will also learn new methods
for using the substantial processing power of the GPU
in other computationally intensive applications, such
as scientific computing and finance. Twenty of the
book's forty-eight chapters are devoted to GPGPU
programming, from basic concepts to advanced
techniques. Written by experts in cutting-edge GPU
programming, this book offers readers practical means
to harness the enormous capabilities of GPUs.",
acknowledgement = ack-nhfb,
keywords = "CUDA; nVIDIA",
remark = "CD-ROM contents: Complementary examples and samples.",
}
@InProceedings{Pjesivac-Grbovic:2005:PAM,
author = "J. Pjesivac-Grbovic and T. Angskun and G. Bosilca and
G. E. Fagg and E. Gabriel and J. J. Dongarra",
title = "Performance Analysis of {MPI} Collective Operations",
crossref = "IEEE:2005:IPD",
pages = "272a-272a",
year = "2005",
bibdate = "Fri May 27 10:13:34 2005",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Article{Rantakokko:2005:DMO,
author = "Jarmo Rantakokko",
title = "A Dynamic {MPI--OpenMP} Model for Structured Adaptive
Mesh Refinement",
journal = j-PARALLEL-PROCESS-LETT,
volume = "15",
number = "1/2",
pages = "37--47",
month = mar # "\slash " # jun,
year = "2005",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626405002040",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Roberti:2005:PIL,
author = "Debora R. Roberti and Roberto P. Souto and Haroldo F.
Campos Velho and Gervasio A. Degrazia and Domenico
Anfossi",
title = "Parallel Implementation of a {Lagrangian} Stochastic
Model for Pollutant Dispersion",
journal = j-INT-J-PARALLEL-PROG,
volume = "33",
number = "5",
pages = "485--498",
month = oct,
year = "2005",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-005-7302-z",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:05:39 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=33&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=33&issue=5&spage=485",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "High performance application; MPI; pollutant
dispersion",
}
@Article{Rufai:2005:MPO,
author = "Raimi Rufai and Muslim Bozyigit and Jaralla Alghamdi
and Moataz Ahmed",
title = "Multithreaded Parallelism with {OpenMP}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "15",
number = "4",
pages = "367--378",
month = dec,
year = "2005",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626405002283",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Sankaran:2005:LMC,
author = "Sriram Sankaran and Jeffrey M. Squyres and Brian
Barrett and Vishal Sahay and Andrew Lumsdaine and Jason
Duell and Paul Hargrove and Eric Roman",
title = "The {LAM\slash MPI} Checkpoint\slash Restart
Framework: System-Initiated Checkpointing",
journal = j-IJHPCA,
volume = "19",
number = "4",
pages = "479--493",
month = "Winter",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005056139",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/4.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/4/479.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Santhanaraman:2005:DZC,
author = "Gopalakrishnan Santhanaraman and Jiesheng Wu and Wei
Huang and Dhabaleswar K. Panda",
title = "Designing Zero-Copy {Message Passing Interface}
Derived Datatype Communication Over {Infiniband}:
Alternative Approaches and Performance Evaluation",
journal = j-IJHPCA,
volume = "19",
number = "2",
pages = "129--142",
month = "Summer",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005054259",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/2/129.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Selikhov:2005:CMB,
author = "A. Selikhov and C. Germain",
title = "A {Channel Memory} based fault tolerance for {MPI}
applications",
journal = j-FUT-GEN-COMP-SYS,
volume = "21",
number = "5",
pages = "709--715",
month = may,
year = "2005",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jul 15 08:00:46 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Book{Sloan:2005:HPL,
author = "Joseph D. (Joseph Donald) Sloan",
title = "High performance {Linux} clusters with {OSCAR},
{Rocks}, {openMosix}, and {MPI}",
publisher = pub-ORA,
address = pub-ORA:adr,
pages = "xv + 350",
year = "2005",
ISBN = "0-596-00570-9",
ISBN-13 = "978-0-596-00570-2",
LCCN = "QA76.58; QA76.58 .S56 2005eb; QA76.58 .S56 2005;
QA76.58 .S58 2005; QA76.58 .S595 2005",
bibdate = "Tue Aug 5 17:41:39 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
melvyl.cdlib.org:210/CDL90",
URL = "http://www.oreilly.com/catalog/9780596005702",
acknowledgement = ack-nhfb,
subject = "Linux; Parallel processing (Electronic computers);
Electronic data processing; Distributed processing",
}
@Article{Thakur:2005:OCC,
author = "Rajeev Thakur and Rolf Rabenseifner and William
Gropp",
title = "Optimization of Collective Communication Operations in
{MPICH}",
journal = j-IJHPCA,
volume = "19",
number = "1",
pages = "49--66",
month = "Spring",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005051521",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/1/49.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Thakur:2005:OSO,
author = "Rajeev Thakur and William Gropp and Brian Toonen",
title = "Optimizing the Synchronization Operations in {Message
Passing Interface} One-Sided Communication",
journal = j-IJHPCA,
volume = "19",
number = "2",
pages = "119--128",
month = "Summer",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005054258",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/2/119.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Tian:2005:CEN,
author = "Xinmin Tian and Jay P. Hoeflinger and Grant Haab and
Yen-Kuang Chen and Milind Girkar and Sanjiv Shah",
title = "A compiler for exploiting nested parallelism in
{OpenMP} programs",
journal = j-PARALLEL-COMPUTING,
volume = "31",
number = "10--12",
pages = "960--983",
month = oct # "\slash " # dec,
year = "2005",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:04 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Tian:2005:PCT,
author = "Xinmin Tian and Milind Girkar and Aart Bik and Hideki
Saito",
title = "Practical Compiler Techniques on Efficient
Multithreaded Code Generation for {OpenMP} Programs",
journal = j-COMP-J,
volume = "48",
number = "5",
pages = "588--601",
month = sep,
year = "2005",
CODEN = "CMPJA6",
DOI = "https://doi.org/10.1093/comjnl/bxh109",
ISSN = "0010-4620 (print), 1460-2067 (electronic)",
ISSN-L = "0010-4620",
bibdate = "Tue Nov 8 05:58:50 MST 2005",
bibsource = "http://comjnl.oxfordjournals.org/content/vol48/issue5/index.dtl;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://comjnl.oxfordjournals.org/cgi/content/abstract/48/5/588;
http://comjnl.oxfordjournals.org/cgi/reprint/48/5/588",
acknowledgement = ack-nhfb,
fjournal = "The Computer Journal",
journal-URL = "http://comjnl.oxfordjournals.org/",
}
@InProceedings{Wiese:2005:IPN,
author = "Kay C. Wiese and Andrew Hendriks and Alain Deschenes
and Belgacem {Ben Youssef}",
title = "The Impact of Pseudorandom Number Quality on
{P-RnaPredict}, a Parallel Genetic Algorithm for {RNA}
Secondary Structure Prediction",
crossref = "Beyer:2005:GEC",
pages = "479--480",
year = "2005",
DOI = "https://doi.org/10.1145/1068009.1068089",
bibdate = "Mon Mar 5 22:02:35 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.cs.bham.ac.uk/~wbl/biblio/gecco2005lbp/papers/52-wiese.pdf",
abstract = "We present a parallel version of RnaPredict, a genetic
algorithm (GA) for RNA secondary structure prediction.
The research presented here builds on previous work and
examines the impact of three different pseudorandom
number generators (PRNGs) on the GA's performance. The
three generators tested are the C standard library PRNG
RAND, a parallelised multiplicative congruential
generator (MCG), and a parallelized Mersenne Twister
(MT). A fully parallel version of RnaPredict using the
Message Passing Interface (MPI) was implemented. The
PRNG comparison tests were performed with known
structures that are 118, 122, 543, and 556 nucleotides
in length. The effects of the PRNGs are investigated
and the predicted structures are compared to known
structures",
acknowledgement = ack-nhfb,
}
@Article{Willcock:2005:UMC,
author = "Jeremiah Willcock and Andrew Lumsdaine and Arch
Robison",
title = "Using {MPI} with {C\#} and the {Common Language
Infrastructure}",
journal = j-CCPE,
volume = "17",
number = "7--8",
pages = "895--917",
month = jun # "\slash " # jul,
year = "2005",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.861",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat May 14 11:30:57 MDT 2005",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "23 Feb 2005",
}
@Article{Yu:2005:HPB,
author = "Weikuan Yu and Sayantan Sur and Dhabaleswar K. Panda
and Rob T. Aulwes and Rich L. Graham",
title = "High Performance Broadcast Support in {LA-MPI} Over
Quadrics",
journal = j-IJHPCA,
volume = "19",
number = "4",
pages = "453--463",
month = "Winter",
year = "2005",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342005056145",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/19/4.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/19/4/453.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Zhang:2005:ULC,
author = "Youhui Zhang and Dongsheng Wong and Weimin Zheng",
title = "User-level checkpoint and recovery for {LAM\slash
MPI}",
journal = j-OPER-SYS-REV,
volume = "39",
number = "3",
pages = "72--81",
month = jul,
year = "2005",
CODEN = "OSRED8",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Sat Aug 26 08:55:48 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
}
@Article{Zheng:2005:SBP,
author = "Gengbin Zheng and Terry Wilmarth and Praveen
Jagadishprasad and Laxmikant V. Kal{\'e}",
title = "Simulation-Based Performance Prediction for Large
Parallel Machines",
journal = j-INT-J-PARALLEL-PROG,
volume = "33",
number = "2--3",
pages = "183--207",
month = jun,
year = "2005",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-005-3582-6",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:05:27 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=33&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=33&issue=2&spage=183",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "adaptive MPI; CHARMH; computation modeling; large
parallel machines; Simulation-based performance
prediction",
}
@Article{Ayguade:2006:ENO,
author = "Eduard Ayguade and Marc Gonzalez and Xavier Martorell
and Gabriele Jost",
title = "Employing nested {OpenMP} for the parallelization of
multi-zone computational fluid dynamics applications",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "5",
pages = "686--697",
month = may,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:34 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Barton:2006:SMP,
author = "Christopher Barton and C{\u{a}}lin Cas{\c{c}}aval and
George Alm{\'a}si and Yili Zheng and Montse Farreras
and Siddhartha Chatterje and Jos{\'e} Nelson Amaral",
title = "Shared memory programming for large scale machines",
journal = j-SIGPLAN,
volume = "41",
number = "6",
pages = "108--117",
month = jun,
year = "2006",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1133981.1133995",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jun 18 10:42:48 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper describes the design and implementation of
a scalable run-time system and an optimizing compiler
for Unified Parallel C (UPC). An experimental
evaluation on BlueGene/L{\reg}, a distributed-memory
machine, demonstrates that the combination of the
compiler with the runtime system produces programs with
performance comparable to that of efficient MPI
programs and good performance scalability up to
hundreds of thousands of processors. Our runtime system
design solves the problem of maintaining shared object
consistency efficiently in a distributed memory
machine. Our compiler infrastructure simplifies the
code generated for parallel loops in UPC through the
elimination of affinity tests, eliminates several
levels of indirection for accesses to segments of
shared arrays that the compiler can prove to be local,
and implements remote update operations through a
lower-cost asynchronous message. The performance
evaluation uses three well-known benchmarks --- HPC
RandomAccess, HPC STREAM and NAS CG --- to obtain
scaling and absolute performance numbers for these
benchmarks on up to 131072 processors, the full
BlueGene/L machine. These results were used to win the
HPC Challenge Competition at SC05 in Seattle WA,
demonstrating that PGAS languages support both
productivity and performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "BlueGene; PGAS programming model; UPC",
}
@Article{Battre:2006:MFP,
author = "Dominic Battr{\'e} and David Sigfredo Angulo",
title = "{MPI} framework for parallel searching in large
biological databases",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "12",
pages = "1503--1511",
month = dec,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:35 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Becciani:2006:FMP,
author = "U. Becciani and M. Comparato and V.
Antonuccio-Delogu",
title = "{FLY MPI-2}: a parallel tree code for {LSS}",
journal = j-COMP-PHYS-COMM,
volume = "174",
number = "7",
pages = "605--606",
day = "1",
month = apr,
year = "2006",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2006.01.002",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Feb 14 14:46:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465506000713",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Bouteiller:2006:HPS,
author = "Aur{\'e}lien Bouteiller and Hinde-Lilia Bouziane and
Thomas Herault and Pierre Lemarinier and Franck
Cappello",
title = "Hybrid Preemptive Scheduling of {Message Passing
Interface} Applications on {Grids}",
journal = j-IJHPCA,
volume = "20",
number = "1",
pages = "77--90",
month = "Spring",
year = "2006",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342006062526",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/20/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/20/1/77.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Bouteiller:2006:MVP,
author = "A. Bouteiller and T. Herault and G. Krawezik and P.
Lemarinier and F. Cappello",
title = "{MPICH-V} Project: a Multiprotocol Automatic
Fault-Tolerant {MPI}",
journal = j-IJHPCA,
volume = "20",
number = "3",
pages = "319--333",
month = "Fall",
year = "2006",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342006067469",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/20/3.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/20/3/319.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Costa:2006:ROA,
author = "J. J. Costa and T. Cortes and X. Martorell and E.
Ayguade and J. Labarta",
title = "Running {OpenMP} applications efficiently on an
everything-shared {SDSM}",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "5",
pages = "647--658",
month = may,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:34 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@InProceedings{delCuvillo:2006:LOC,
author = "Juan del Cuvillo and Weirong Zhu and Guang Gao",
title = "Landing {OpenMP} on {Cyclops-64}: an efficient mapping
of {OpenMP} to a many-core system-on-a-chip",
crossref = "ACM:2006:PCC",
pages = "41--50",
year = "2006",
DOI = "https://doi.org/10.1145/1128022.1128030",
bibdate = "Tue Jun 20 06:42:45 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper presents our experience mapping OpenMP
parallel programming model to the IBM Cyclops-64 (C64)
architecture. The C64 employs a many-core-on-a-chip
design that integrates processing logic (160 thread
units), embedded memory (5MB) and communication
hardware on the same die. Such a unique architecture
presents new opportunities for optimization.
Specifically, we consider the following three areas:
(1) a memory aware runtime library that places
frequently used data structures in scratchpad memory;
(2) a unique spin lock algorithm for shared memory
synchronization based on in-memory atomic instructions
and native support for thread level execution; (3) a
fast barrier that directly uses C64 hardware support
for collective synchronization. All three optimizations
together, result in an 80\% overhead reduction for
language constructs in OpenMP. We believe that such a
drastic reduction in the cost of managing parallelism
makes OpenMP more amenable for writing parallel
programs on the C64 platform.",
acknowledgement = ack-nhfb,
}
@Article{Deng:2006:PIK,
author = "Junjun Deng and Hengyong Yu and Jun Ni and Tao He and
Shiying Zhao and Lihe Wang and Ge Wang",
title = "A Parallel Implementation of the Katsevich Algorithm
for {$3$-D CT} Image Reconstruction",
journal = j-J-SUPERCOMPUTING,
volume = "38",
number = "1",
pages = "35--47",
month = oct,
year = "2006",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-006-6675-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:29 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=38&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=38&issue=1&spage=35",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Computed tomography (CT); high performance computing;
image reconstruction; Katsevich algorithm; medical
imaging; MPI; parallel computing; spiral cone-beam CT",
}
@Article{Donev:2006:ICF,
author = "Aleksander Donev",
title = "Interoperability with {C} in {Fortran 2003}",
journal = j-FORTRAN-FORUM,
volume = "25",
number = "1",
pages = "8--12",
month = apr,
year = "2006",
DOI = "https://doi.org/10.1145/1124708.1124710",
ISSN = "1061-7264 (print), 1931-1311 (electronic)",
ISSN-L = "1061-7264",
bibdate = "Wed Apr 12 07:18:43 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "One of the major new features in the Fortran 2003 is
features for interoperability with C Interop. The
intrinsic module ISO\_C\_BINDING provides: * constants,
mostly type parameters, C\_NULL\_CHAR, C\_NULL\_PTR,
and others, * types, and in particular, TYPE(C\_PTR)
and TYPE(C\_FUNPTR), * procedures, such as C\_LOC,
C\_FUNLOC, C\_F\_POINTER, C\_F\_PROCPOINTER and
C\_ASSOCIATED. A Fortran interface can be specified for
a C function with external linkage and used to invoke
such a function. The interface has the characteristic
BIND(C) label, and must also satisfy some additional
restrictions. C Interop can be used to portably use
multi-language codes in Fortran. Since most languages
interoperate with C, the feature can actually be used
to interoperate with other programming languages as
well. C Interop can also be used to give access to
Fortran programmers to the many standard libraries with
widely-used and implemented C interfaces. This includes
lower-level tasks such as interfacing with the OS on
UNIX-based systems, or using special libraries like
OpenGL. For simple API's, developing Fortran interfaces
is practically trivial once one gets some experience.
For more complicated API's whose full
functionality/power is not needed, such as for example
TCP/IP sockets or shared-memory segments on UNIX
systems, it is often easier to develop a condensed C
API/library that does the actual work, and is simpler
to interface to from Fortran. However, for libraries
like OpenGL, one should provide a full Fortran
interface so that the whole API can be accessed. Doing
this manually is not easy and is also error-prone due
to the size of the OpenGL/GLU/GLUT interfaces. For
certain libraries like MPI, a special Fortran interface
may be defined for the purposes of efficiency,
portability, ease-of-use, or to accommodate for
language semantic differences. In this first paper, we
will show how to develop a Fortran interface for a
simple C API/library. In a second paper, we consider
automating the process so that large and more complex
API's, and in particular, OpenGL, can be handled. The
source codes can be obtained at
http://atom.princeton.edu/donev/F2x.Along the way, we
identify some problems with the design of C Interop in
Fortran 2003.",
acknowledgement = ack-nhfb,
fjournal = "ACM Fortran Forum",
xxCODEN = "????",
}
@Article{Drosinos:2006:EPT,
author = "Nikolaos Drosinos and Nectarios Koziris",
title = "The Effect of Process Topology and Load Balancing on
Parallel Programming Models for {SMP} Clusters and
Iterative Algorithms",
journal = j-J-SUPERCOMPUTING,
volume = "35",
number = "1",
pages = "65--91",
month = jan,
year = "2006",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-006-1156-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:27 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=35&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=35&issue=1&spage=65",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "high performance computing; hybrid programming;
iterative algorithms; MPI; OpenMP; parallel
programming; SMP clusters; tiling",
}
@Article{Huang:2006:ECS,
author = "Jih-Woei Huang and Chih-Ping Chu",
title = "An Efficient Communication Scheduling Method for the
Processor Mapping Technique Applied Data
Redistribution",
journal = j-J-SUPERCOMPUTING,
volume = "37",
number = "3",
pages = "297--318",
month = sep,
year = "2006",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-006-6615-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:29 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=37&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=37&issue=3&spage=297",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "communication scheduling; data redistribution;
data-parallel programming; MPI; parallel compiler;
processor mapping",
}
@Article{Krawezik:2006:PCM,
author = "G{\'e}raud Krawezik and Franck Cappello",
title = "Performance comparison of {MPI} and {OpenMP} on shared
memory multiprocessors",
journal = j-CCPE,
volume = "18",
number = "1",
pages = "29--61",
month = jan,
year = "2006",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.905",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:00 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "11 Oct 2005",
}
@Article{Lastovetsky:2006:HTM,
author = "Alexey Lastovetsky and Ravi Reddy",
title = "{HeteroMPI}: Towards a message-passing library for
heterogeneous networks of computers",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "2",
pages = "197--220",
month = feb,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:34 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Le:2006:DMC,
author = "Thuy T. Le and Jalel Rejeb",
title = "A detailed {MPI} communication model for distributed
systems",
journal = j-FUT-GEN-COMP-SYS,
volume = "22",
number = "3",
pages = "269--278",
month = feb,
year = "2006",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:05 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Lee:2006:PT,
author = "Edward A. Lee",
title = "The Problem with Threads",
journal = j-COMPUTER,
volume = "39",
number = "5",
pages = "33--42",
month = may,
year = "2006",
CODEN = "CPTRB4",
DOI = "https://doi.org/10.1109/MC.2006.180",
ISSN = "0018-9162 (print), 1558-0814 (electronic)",
ISSN-L = "0018-9162",
bibdate = "Fri Jul 4 17:16:20 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "For concurrent programming to become mainstream, we
must discard threads as a programming model.
Nondeterminism should be judiciously and carefully
introduced where needed, and it should be explicit in
programs. In general-purpose software engineering
practice, we have reached a point where one approach to
concurrent programming dominates all others namely,
threads, sequential processes that share memory. They
represent a key concurrency model supported by modern
computers, programming languages, and operating
systems. In scientific computing, where performance
requirements have long demanded concurrent programming,
data-parallel language extensions and message-passing
libraries such as PVM, MPI, and OpenMP dominate over
threads for concurrent programming. Computer
architectures intended for scientific computing often
differ significantly from so-called general-purpose
architectures.",
acknowledgement = ack-nhfb,
fjournal = "Computer",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2",
}
@Article{Liao:2006:SDI,
author = "Wei-keng Liao and Kenin Coloma and Alok Choudhary and
Lee Ward and Eric Russell and Neil Pundit",
title = "Scalable Design and Implementations for {MPI} Parallel
Overlapping {I/O}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "17",
number = "11",
pages = "1264--1276",
month = nov,
year = "2006",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2006.163",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Jul 3 14:26:50 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Lopez:2006:ESM,
author = "F. C. Garc{\'\i}a L{\'o}pez and N. L. Fr{\'\i}as
Arrocha",
title = "An efficient synchronization model for {OpenMP}",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "11",
pages = "1359--1365",
month = nov,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:35 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Marowka:2006:BRP,
author = "Ami Marowka",
title = "Book Review: {{\em Parallel Scientific Computation: A
Structured Approach using BSP and MPI}}",
journal = j-SCPE,
volume = "7",
number = "2",
pages = "107--108",
month = jun,
year = "2006",
CODEN = "????",
ISSN = "1895-1767",
bibdate = "Thu Sep 2 11:55:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/7/2.toc",
URL = "http://www.scpe.org/vols/vol07/no2/vol07no2bookreview.html",
acknowledgement = ack-nhfb,
}
@Article{Mehta:2006:MSG,
author = "Paras Mehta and Jos{\'e} Nelson Amaral and Duane
Szafron",
title = "Is {MPI} suitable for a generative design-pattern
system?",
journal = j-PARALLEL-COMPUTING,
volume = "32",
number = "7--8",
pages = "616--626",
month = sep,
year = "2006",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:05 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Norden:2006:OVM,
author = "Markus Nord{\'e}n and Sverker Holmgren and Michael
Thun{\'e}",
title = "{OpenMP} versus {MPI} for {PDE} solvers based on
regular sparse numerical operators",
journal = j-FUT-GEN-COMP-SYS,
volume = "22",
number = "1--2",
pages = "194--203",
month = jan,
year = "2006",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:05 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{ODowd:2006:WGM,
author = "Padraig J. O'Dowd and Adarsh Patil and John P.
Morrison",
title = "{WebCom-G} and {MPICH-G2} Jobs",
journal = j-SCPE,
volume = "7",
number = "3",
pages = "75--86",
month = sep,
year = "2006",
CODEN = "????",
ISSN = "1895-1767",
bibdate = "Thu Sep 2 11:55:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/7/3.toc",
URL = "http://www.scpe.org/vols/vol07/no3/SCPE_7_3_07.pdf;
http://www.scpe.org/vols/vol07/no3/SCPE_7_3_07.zip",
acknowledgement = ack-nhfb,
}
@Article{Ohara:2006:MMP,
author = "M. Ohara and H. Inoue and Y. Sohda and H. Komatsu and
T. Nakatani",
title = "{MPI} microtask for programming the {Cell Broadband
Engine{\TM}} processor",
journal = j-IBM-SYS-J,
volume = "45",
number = "1",
pages = "85--102",
month = "????",
year = "2006",
CODEN = "IBMSA7",
ISSN = "0018-8670",
bibdate = "Mon Feb 12 18:19:14 MST 2007",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.research.ibm.com/journal/",
URL = "http://www.research.ibm.com/journal/sj/451/ohara.html",
acknowledgement = ack-nhfb,
fjournal = "IBM Systems Journal",
ordernumber = "????",
}
@InProceedings{Paul:2006:TLF,
author = "Jerome L. Paul and Michal Kouril and Kenneth A.
Berman",
title = "A template library to facilitate teaching message
passing parallel computing",
crossref = "ACM:2006:PST",
pages = "464--468",
year = "2006",
DOI = "https://doi.org/10.1145/1121341.1121487",
bibdate = "Tue Jun 20 06:51:37 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper discusses a template-based approach to aid
in introducing the upper-division undergraduate (or
first year graduate) to the rapidly emerging message
passing parallel computing paradigm. Our template
library facilitates an accelerated MPI programming
learning environment that can realistically be included
as one topic among many in an algorithms course. One
template module is based on a backtracking solution to
the satisfiability problem (SAT), which the student
first solves in the sequential setting. With the aid of
a modified template, the student then develops a simple
parallel SAT solver. The template includes such things
as I/O functions, allowing the student to focus on the
algorithm itself. The parallel part is partially
provided by the template, with indicators given in
places where the student needs to plug in missing MPI
function calls. The students are excited about this
hands-on-experience in the increasingly important world
of message passing parallel computing, which might be
missed if their curriculum does not include a course
devoted to this topic.",
acknowledgement = ack-nhfb,
}
@Article{Rozman:2006:CPL,
author = "Igor Rozman and Marjan {\v{s}}terk and Roman Trobec",
title = "Communication Performance of {LAM\slash MPI} and
{MPICH} on a {Linux} Cluster",
journal = j-PARALLEL-PROCESS-LETT,
volume = "16",
number = "3",
pages = "323--334",
month = sep,
year = "2006",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626406002678",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Su:2006:APP,
author = "Hai-Jun Su and J. Michael McCarthy and Masha Sosonkina
and Layne T. Watson",
title = "{Algorithm 857}: {POLSYS\_GLP}---a parallel general
linear product homotopy code for solving polynomial
systems of equations",
journal = j-TOMS,
volume = "32",
number = "4",
pages = "561--579",
month = dec,
year = "2006",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1186785.1186789",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Sat Apr 14 09:48:57 MDT 2007",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Globally convergent, probability-one homotopy methods
have proven to be very effective for finding all the
isolated solutions to polynomial systems of equations.
After many years of development, homotopy path trackers
based on probability-one homotopy methods are reliable
and fast. Now, theoretical advances reducing the number
of homotopy paths that must be tracked and handling
singular solutions have made probability-one homotopy
methods even more practical. POLSYS\_GLP consists of
Fortran 95 modules for finding all isolated solutions
of a complex coefficient polynomial system of
equations. The package is intended to be used on a
distributed memory multiprocessor in conjunction with
HOMPACK90 (Algorithm 777), and makes extensive use of
Fortran 95-derived data types and MPI to support a
general linear product (GLP) polynomial system
structure. GLP structure is intermediate between the
partitioned linear product structure used by
POLSYS\_PLP (Algorithm 801) and the BKK-based structure
used by PHCPACK. The code requires a GLP structure as
input, and although finding the optimal GLP structure
is a difficult combinatorial problem, generally
physical or engineering intuition about a problem
yields a very good GLP structure. POLSYS\_GLP employs a
sophisticated power series end game for handling
singular solutions, and provides support for problem
definition both at a high level and via hand-crafted
code. Different GLP structures and their corresponding
Bezout numbers can be systematically explored before
committing to root finding.",
acknowledgement = ack-nhfb,
fjournal = "ACM Transactions on Mathematical Software",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Weatherly:2006:DMS,
author = "D. Brent Weatherly and David K. Lowenthal and Mario
Nakazawa and Franklin Lowenthal",
title = "{Dyn-MPI}: Supporting {MPI} on medium-scale,
non-dedicated clusters",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "6",
pages = "822--838",
month = jun,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 22:04:47 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Zarrelli:2006:EPE,
author = "Roberto Zarrelli and Mario Petrone and Angelo
Iannaccio",
title = "Enabling {PVM} to exploit the {SCTP} protocol",
journal = j-J-PAR-DIST-COMP,
volume = "66",
number = "11",
pages = "1472--1479",
month = nov,
year = "2006",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:35 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Zheng:2006:PEA,
author = "Gengbin Zheng and Chao Huang and Laxmikant V.
Kal{\'e}",
title = "Performance evaluation of automatic checkpoint-based
fault tolerance for {AMPI} and {Charm++}",
journal = j-OPER-SYS-REV,
volume = "40",
number = "2",
pages = "90--99",
month = apr,
year = "2006",
CODEN = "OSRED8",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Sat Aug 26 08:55:43 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
}
@Article{Adhianto:2007:PMC,
author = "Laksono Adhianto and Barbara Chapman",
title = "Performance modeling of communication and computation
in hybrid {MPI} and {OpenMP} applications",
journal = j-SIM-MODEL-PRACT-THEORY,
volume = "15",
number = "4",
pages = "481--491",
month = apr,
year = "2007",
CODEN = "SMPTCA",
DOI = "https://doi.org/10.1016/j.simpat.2006.11.014",
ISSN = "1569-190X (print), 1878-1462 (electronic)",
ISSN-L = "1569-190X",
bibdate = "Mon Oct 07 09:21:03 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.sciencedirect.com/science/article/pii/S1569190X06001109",
acknowledgement = ack-nhfb,
fjournal = "Simulation Modelling Practice and Theory",
}
@Article{anMey:2007:NPO,
author = "Dieter an Mey and Samuel Sarholz and Christian
Terboven",
title = "Nested Parallelization with {OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "5",
pages = "459--476",
month = oct,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0054-1",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:48 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=459",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "ccNUMA; Nested parallelization; OpenMP; Shared memory
parallelization",
}
@Article{Ayguade:2007:SIO,
author = "Eduard Ayguad{\'e} and Matthias S. Mueller",
title = "Special Issue on {OpenMP} --- {Guest Editors}'
Introduction",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "4",
pages = "331--333",
month = aug,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0048-z",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:44 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=331",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Becciani:2007:FMH,
author = "U. Becciani and V. Antonuccio-Delogu and M.
Comparato",
title = "{FLY}: {MPI}-2 high resolution code for {LSS}
cosmological simulations",
journal = j-COMP-PHYS-COMM,
volume = "176",
number = "3",
pages = "211--217",
day = "1",
month = feb,
year = "2007",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2006.10.001",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:42:13 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465506003687",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Bronevetsky:2007:CFS,
author = "Greg Bronevetsky and Bronis R. de Supinski",
title = "Complete Formal Specification of the {OpenMP} Memory
Model",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "4",
pages = "335--392",
month = aug,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0051-4",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:44 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=335",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "formal systems; OpenMP; parallel programming; theorem
proving",
}
@Article{Brown:2007:HSP,
author = "Russell Brown and Ilya Sharapov",
title = "High-Scalability Parallelization of a Molecular
Modeling Application: Performance and Productivity
Comparison Between {OpenMP} and {MPI} Implementations",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "5",
pages = "441--458",
month = oct,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0057-y",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:48 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=441",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Molecular modeling; MPI; OpenMP; Parallel
programming",
}
@Article{Buntinas:2007:IES,
author = "Darius Buntinas and Guillaume Mercier and William
Gropp",
title = "Implementation and evaluation of shared-memory
communication and synchronization operations in
{MPICH2} using the {Nemesis} communication subsystem",
journal = j-PARALLEL-COMPUTING,
volume = "33",
number = "9",
pages = "634--644",
month = sep,
year = "2007",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:07 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Carbajal:2007:PTD,
author = "Santiago Garcia Carbajal",
title = "Parallelizing Three Dimensional Cellular Automata with
{OpenMP}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "17",
number = "4",
pages = "349--361",
month = dec,
year = "2007",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626407003083",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Chandra:2007:ESP,
author = "Sumir Chandra and Xiaolin Li and Taher Saif and Manish
Parashar",
title = "Enabling scalable parallel implementations of
structured adaptive mesh refinement applications",
journal = j-J-SUPERCOMPUTING,
volume = "39",
number = "2",
pages = "177--203",
month = feb,
year = "2007",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0110-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:30 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=39&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=39&issue=2&spage=177",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "3-D Richtmyer-Meshkov application; Bin-packing based
load-balancing; Hierarchical partitioning; MPI
non-blocking communication optimization; SAMR
scalability; Structured adaptive mesh refinement",
}
@Article{Chau:2007:MIP,
author = "Ming Chau and Didier {El Baz} and Ronan Guivarch and
Pierre Spiteri",
title = "{MPI} implementation of parallel subdomain methods for
linear and nonlinear convection--diffusion problems",
journal = j-J-PAR-DIST-COMP,
volume = "67",
number = "5",
pages = "581--591",
month = may,
year = "2007",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:35 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Desai:2007:CEM,
author = "Narayan Desai and Ewing Lusk and Rick Bradshaw",
title = "A Composition Environment for {MPI} Programs",
journal = j-IJHPCA,
volume = "21",
number = "2",
pages = "166--173",
month = may,
year = "2007",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342007077858",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/21/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/21/2/166.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{DiMartino:2007:SIS,
author = "Beniamino {Di Martino} and Dieter Kranzlm{\"u}ller and
Jack Dongarra",
title = "Special issue on selected papers from the
{EuroPVM\slash MPI 2005 Conference, Sorrento, Italy,
18-21 September 2005} --- Preface",
journal = j-IJHPCA,
volume = "21",
number = "2",
pages = "129--131",
month = "Summer",
year = "2007",
DOI = "https://doi.org/10.1177/1094342006077863",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Jun 4 08:20:03 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Duran:2007:PEH,
author = "Alejandro Duran and Roger Ferrer and Juan Jos{\'e}
Costa and Marc Gonz{\`a}lez and Xavier Martorell and
Eduard Ayguad{\'e} and Jes{\'u}s Labarta",
title = "A Proposal for Error Handling in {OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "4",
pages = "393--416",
month = aug,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0049-y",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:44 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=393",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "error handling; fault tolerance; OpenMP; parallel
languages; parallel programming",
}
@Article{Falzone:2007:PMF,
author = "Christopher Falzone and Anthony Chan and Ewing Lusk
and William Gropp",
title = "A Portable Method for Finding User Errors in the Usage
of {MPI} Collective Operations",
journal = j-IJHPCA,
volume = "21",
number = "2",
pages = "155--165",
month = may,
year = "2007",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342007077860",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/21/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/21/2/155.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Genaud:2007:PMP,
author = "St{\'e}phane Genaud and Choopan Rattanapoka",
title = "{P2P--MPI}: a Peer-to-Peer Framework for Robust
Execution of Message Passing Parallel Programs on
{Grids}",
journal = j-J-GRID-COMP,
volume = "5",
number = "1",
pages = "27--42",
month = mar,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1007/s10723-006-9056-2",
ISSN = "1570-7873 (print), 1572-9184 (electronic)",
ISSN-L = "1570-7873",
bibdate = "Wed Jul 9 17:01:30 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=1570-7873&volume=5&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=1570-7873&volume=5&issue=1&spage=27",
acknowledgement = ack-nhfb,
fjournal = "Journal of Grid Computing",
journal-URL = "http://link.springer.com/journal/10723",
keywords = "Grid; Java; Key words; middleware; MPI; peer-to-peer",
}
@Article{Giannoutakis:2007:MHP,
author = "K. M. Giannoutakis and G. A. Gravvanis and B. Clayton
and A. Patil and T. Enright and J. P. Morrison",
title = "Matching high performance approximate inverse
preconditioning to architectural platforms",
journal = j-J-SUPERCOMPUTING,
volume = "42",
number = "2",
pages = "145--163",
month = nov,
year = "2007",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0129-1",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:32 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=42&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=42&issue=2&spage=145",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Approximate inverses; Globus toolkit; MPI; Open MPI;
Parallel iterative methods; Parallel/distributed
computations; Preconditioned conjugate gradient
method",
}
@Article{Graham:2007:OMH,
author = "Richard L. Graham and Brian W. Barrett and Galen M.
Shipman and Timothy S. Woodall and George Bosilca",
title = "{Open MPI}: a High Performance, Flexible
Implementation of {MPI} Point-to-Point Communications",
journal = j-PARALLEL-PROCESS-LETT,
volume = "17",
number = "1",
pages = "79--88",
month = mar,
year = "2007",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626407002880",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Gropp:2007:TSM,
author = "William Gropp and Rajeev Thakur",
title = "Thread-safety in an {MPI} implementation: Requirements
and analysis",
journal = j-PARALLEL-COMPUTING,
volume = "33",
number = "9",
pages = "595--604",
month = sep,
year = "2007",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:07 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@InProceedings{Gu:2007:IPC,
author = "Feng Long Gu and Hyacinthe Nzigou M. and Guilherme de
Melo Baptista Domingues and Takeshi Nanri and Kazuaki
Murakami",
title = "Investigating the Performance of Collective
Communications on {SMP} Clusters: a Case for {{\tt
MPI\_Allgather}}",
crossref = "Simos:2007:CMS",
volume = "2A",
pages = "52--56",
year = "2007",
bibdate = "Thu Feb 21 14:34:40 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://proceedings.aip.org/getpdf/servlet/GetPDFServlet?filetype=pdf&
id=APCPCS000963000002000052000001& idtype=cvips",
acknowledgement = ack-nhfb,
}
@Article{Klemm:2007:JIO,
author = "Michael Klemm and Matthias Bezold and Ronald Veldema
and Michael Philippsen",
title = "{JaMP}: an implementation of {OpenMP} for a {Java
DSM}",
journal = j-CCPE,
volume = "19",
number = "18",
pages = "2333--2352",
day = "25",
month = dec,
year = "2007",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1178",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:18 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "3 Apr 2007",
}
@Article{Kurzyniec:2007:UCA,
author = "Dawid Kurzyniec and Magdalena Slawi{\'n}ska and
Jaroslaw Slawi{\'n}ski and Vaidy Sunderam",
title = "{Unibus}: a contrarian approach to {Grid} computing",
journal = j-J-SUPERCOMPUTING,
volume = "42",
number = "1",
pages = "125--144",
month = oct,
year = "2007",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-006-0033-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:32 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=42&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=42&issue=1&spage=125",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Aggregation; Grids; MPI; Resource sharing;
Virtualization",
}
@Article{Latham:2007:IMI,
author = "Robert Latham and Robert Ross and Rajeev Thakur",
title = "Implementing {MPI-IO} Atomic Mode and Shared File
Pointers Using {MPI} One-Sided Communication",
journal = j-IJHPCA,
volume = "21",
number = "2",
pages = "132--143",
month = may,
year = "2007",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342007077859",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/21/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/21/2/132.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Li:2007:DIV,
author = "Kuan-Ching Li and Hsun-Chang Chang",
title = "The design and implementation of visual performance
monitoring and analysis toolkit for cluster and {Grid}
environments",
journal = j-J-SUPERCOMPUTING,
volume = "40",
number = "3",
pages = "299--317",
month = jun,
year = "2007",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-006-0020-5",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:31 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=40&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=40&issue=3&spage=299",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Distributed computing; Monitoring; MPI parallel
program; Performance visualization",
}
@Article{Liao:2007:CCS,
author = "Wei-keng Liao and Kenin Coloma and Alok Choudhary and
Lee Ward",
title = "Cooperative Client-Side File Caching for {MPI}
Applications",
journal = j-IJHPCA,
volume = "21",
number = "2",
pages = "144--154",
month = may,
year = "2007",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342007077857",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/21/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/21/2/144.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Liao:2007:OOP,
author = "Chunhua Liao and Oscar Hernandez and Barbara Chapman
and Wenguang Chen and Weimin Zheng",
title = "{OpenUH}: an optimizing, portable {OpenMP} compiler",
journal = j-CCPE,
volume = "19",
number = "18",
pages = "2317--2332",
day = "25",
month = dec,
year = "2007",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1174",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:18 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "3 Apr 2007",
}
@Article{Marathe:2007:SCC,
author = "Jaydeep Marathe and Frank Mueller",
title = "Source-Code-Correlated Cache Coherence
Characterization of {OpenMP} Benchmarks",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "18",
number = "6",
pages = "818--834",
month = jun,
year = "2007",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2007.1058",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Jul 3 14:26:52 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Marowka:2007:PCD,
author = "Ami Marowka",
title = "Parallel computing on any desktop",
journal = j-CACM,
volume = "50",
number = "9",
pages = "74--78",
month = sep,
year = "2007",
CODEN = "CACMA2",
DOI = "https://doi.org/10.1145/1284621.1284622",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Mon Jun 16 18:32:57 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallelization lets applications exploit the high
throughput of new multicore processors, and the OpenMP
parallel programming model helps developers create
multithreaded applications.",
acknowledgement = ack-nhfb,
fjournal = "Communications of the ACM",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79",
}
@Article{Mohr:2007:SPE,
author = "Bernd Mohr and Jesper Larsson Tr{\"a}ff and Joachim
Worringen",
title = "Selected papers from {EuroPVM\slash MPI 2006}",
journal = j-PARALLEL-COMPUTING,
volume = "33",
number = "9",
pages = "593--594",
month = sep,
year = "2007",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:07 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Morris:2007:SNO,
author = "Alan Morris and Allen D. Malony and Sameer S. Shende",
title = "Supporting Nested {OpenMP} Parallelism in the {TAU}
Performance System",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "4",
pages = "417--436",
month = aug,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0050-5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:44 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=4&spage=417",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Nested parallelism; OpenMP; TAU",
}
@Article{Nascimento:2007:DDS,
author = "Aline P. Nascimento and Alexandre C. Sena and Cristina
Boeres and Vinod E. F. Rebello",
title = "Distributed and dynamic self-scheduling of parallel
{MPI Grid} applications",
journal = j-CCPE,
volume = "19",
number = "14",
pages = "1955--1974",
day = "25",
month = sep,
year = "2007",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1139",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:16 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "14 Nov 2006",
}
@Article{Norden:2007:DDM,
author = "Markus Nord{\'e}n and Henrik L{\"o}f and Jarmo
Rantakokko and Sverker Holmgren",
title = "Dynamic Data Migration for Structured {AMR} Solvers",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "5",
pages = "477--491",
month = oct,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0056-z",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:48 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=477",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Adaptive mesh refinement; cc-NUMA; Geographical
locality; Graph partitioning; OpenMP; Page migration;
SAMR; Shared memory",
}
@Article{Pandey:2007:SCM,
author = "Nirved Pandey and G. K. Sharma",
title = "Startup comparison for message passing libraries with
{DTM} on {Linux} clusters",
journal = j-J-SUPERCOMPUTING,
volume = "39",
number = "1",
pages = "59--72",
month = jan,
year = "2007",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-006-0004-5",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:30 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=39&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=39&issue=1&spage=59",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Chameleon MPICH; Distributed Performance Index (DPI);
Distributed Task Machine (DTM); High Performance
Cluster (HPC); Message Passing Interface (MPI); MPI--
Parallel Virtual Machine (PVM); Relative Distributed
Performance Index (RDPI)",
}
@Article{Pedicini:2007:PPE,
author = "Marco Pedicini and Francesco Quaglia",
title = "{PELCR}: {Parallel} environment for optimal
lambda-calculus reduction",
journal = j-TOCL,
volume = "8",
number = "3",
pages = "14:1--14:??",
month = jul,
year = "2007",
CODEN = "????",
DOI = "https://doi.org/10.1145/1243996.1243997",
ISSN = "1529-3785 (print), 1557-945X (electronic)",
ISSN-L = "1529-3785",
bibdate = "Mon Jun 16 14:28:15 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "In this article we present the implementation of an
environment supporting L{\'e}vy's optimal reduction for
the $ \lambda $-calculus on parallel (or distributed)
computing systems. In a similar approach to Lamping's,
we base our work on a graph reduction technique, known
as directed virtual reduction, which is actually a
restriction of Danos-Regnier virtual reduction.\par
The environment, which we refer to as PELCR (parallel
environment for optimal lambda-calculus reduction),
relies on a strategy for directed virtual reduction,
namely half combustion. While developing PELCR we
adopted both a message aggregation technique, allowing
reduction of the communication overhead, and a fair
policy for distributing dynamically originated load
among processors.\par
We also present an experimental study demonstrating the
ability of PELCR to definitely exploit the parallelism
intrinsic to $ \lambda $-terms while performing the
reduction. We show how PELCR allows achieving up to
70--80\% of the ideal speedup on last generation
multiprocessor computing systems. As a last note, the
software modules have been developed with the C
language and using a standard interface for message
passing, that is, MPI, thus making PELCR itself a
highly portable software package.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Computational Logic",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J773",
keywords = "functional programming; geometry of interaction;
linear logic; optimal reduction; parallel
implementation; virtual reduction",
}
@Article{Pjesivac-Grbovic:2007:MCA,
author = "Jelena Pje{\v{s}}ivac-Grbovi{\'c} and George Bosilca
and Graham E. Fagg and Thara Angskun and Jack J.
Dongarra",
title = "{MPI} collective algorithm selection and quadtree
encoding",
journal = j-PARALLEL-COMPUTING,
volume = "33",
number = "9",
pages = "613--623",
month = sep,
year = "2007",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:07 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Pjesivac-Grbovic:2007:PAM,
author = "Jelena Pjesivac-Grbovic and Thara Angskun and George
Bosilca and Graham E. Fagg and Edgar Gabriel and Jack
J. Dongarra",
title = "Performance analysis of {MPI} collective operations",
journal = "The Journal of Networks, Software Tools, and Cluster
Computing",
volume = "10",
number = "2",
pages = "127--143",
month = "????",
year = "2007",
DOI = "https://doi.org/10.1007/s10586-007-0012-0",
ISSN = "1386-7857",
bibdate = "Tue Jun 4 08:20:03 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "Denver, CO",
conference-date = "Apr 03--08, 2005",
conference-name = "4th International Workshop on Performance
Modelling, Evaluation, and Optimization of Parallel and
Distributed Systems",
}
@Article{Ramadan:2007:TDM,
author = "Omar Ramadan",
title = "Three dimensional {MPI} parallel implementation of the
{PML} algorithm for truncating finite-difference
time-domain {Grids}",
journal = j-PARALLEL-COMPUTING,
volume = "33",
number = "2",
pages = "109--115",
month = mar,
year = "2007",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:06 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Rycerz:2007:IBS,
author = "Katarzyna Rycerz and Alfredo Tirado-Ramos and Alessia
Gualandris and Simon F. Portegies Zwart and Marian
Bubak and Peter M. A. Sloot",
title = "Interactive {N}-Body Simulations on the {Grid}: {HLA}
Versus {MPI}",
journal = j-IJHPCA,
volume = "21",
number = "2",
pages = "210--221",
month = may,
year = "2007",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342007074874",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/21/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/21/2/210.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Tsujita:2007:RMP,
author = "Y. Tsujita",
title = "Remote {MPI-I/O} on a Parallel Virtual File System
Using a Circular Buffer for High Throughput",
journal = j-INT-J-COMPUT-APPL,
volume = "29",
number = "3",
pages = "291--299",
year = "2007",
DOI = "https://doi.org/10.1080/1206212X.2007.11441859",
ISSN = "1206-212X (print), 1925-7074 (electronic)",
ISSN-L = "1206-212X",
bibdate = "Sat Apr 21 17:24:05 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijca.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.tandfonline.com/doi/full/10.1080/1206212X.2007.11441859",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Computer Applications",
journal-URL = "https://www.tandfonline.com/loi/tjca20",
online-date = "11 Jul 2015",
}
@Article{Wang:2007:EAP,
author = "Perry H. Wang and Jamison D. Collins and Gautham N.
Chinya and Hong Jiang and Xinmin Tian and Milind Girkar
and Nick Y. Yang and Guei-Yuan Lueh and Hong Wang",
title = "{EXOCHI}: architecture and programming environment for
a heterogeneous multi-core multithreaded system",
journal = j-SIGPLAN,
volume = "42",
number = "6",
pages = "156--166",
month = jun,
year = "2007",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1250734.1250753",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jun 18 10:55:30 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Future mainstream microprocessors will likely
integrate specialized accelerators, such as GPUs, onto
a single die to achieve better performance and power
efficiency. However, it remains a keen challenge to
program such a heterogeneous multicore platform, since
these specialized accelerators feature ISAs and
functionality that are significantly different from the
general purpose CPU cores. In this paper, we present
EXOCHI: (1) Exoskeleton Sequencer (EXO), an
architecture to represent heterogeneous accelerators as
ISA-based MIMD architecture resources, and a shared
virtual memory heterogeneous multithreaded program
execution model that tightly couples specialized
accelerator cores with general-purpose CPU cores, and
(2) C for Heterogeneous Integration (CHI), an
integrated C/C++ programming environment that supports
accelerator-specific inline assembly and
domain-specific languages. The CHI compiler extends the
OpenMP pragma for heterogeneous multithreading
programming, and produces a single fat binary with code
sections corresponding to different instruction sets.
The runtime can judiciously spread parallel computation
across the heterogeneous cores to optimize performance
and power.\par
We have prototyped the EXO architecture on a physical
heterogeneous platform consisting of an Intel{\reg}
Core{\TM} 2 Duo processor and an 8-core 32-thread
Intel{\reg} Graphics Media Accelerator X3000. In
addition, we have implemented the CHI integrated
programming environment with the Intel{\reg} C++
Compiler, runtime toolset, and debugger. On the EXO
prototype system, we have enhanced a suite of
production-quality media kernels for video and image
processing to utilize the accelerator through the CHI
programming interface, achieving significant speedup
(1.41X to10.97X) over execution on the IA32 CPU
alone.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "GPU; heterogeneous multi-cores; openMP",
}
@Article{Weng:2007:OIS,
author = "Tien-Hsiung Weng and Ruey-Kuen Perng and Barbara
Chapman",
title = "{OpenMP} Implementation of {SPICE3} Circuit
Simulator",
journal = j-INT-J-PARALLEL-PROG,
volume = "35",
number = "5",
pages = "493--505",
month = oct,
year = "2007",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-007-0053-2",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:06:48 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=35&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=35&issue=5&spage=493",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "OpenMP SPICE circuit simulator; Shared-memory
programming model",
}
@Article{Wu:2007:IFR,
author = "C.-L. Wu and D.-C. Lou and S.-Y. Chen",
title = "Integer factorization for {RSA} cryptosystem under a
{PVM} environment",
journal = j-INT-J-COMPUT-SYST-SCI-ENG,
volume = "22",
number = "1--2",
pages = "??--??",
month = jan # "\slash " # mar,
year = "2007",
CODEN = "CSSEEI",
ISSN = "0267-6192",
bibdate = "Tue Dec 03 12:31:25 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computsystscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Computer Systems Science and
Engineering",
remark = "Special issue: Privacy Data Management",
}
@Article{Zhong:2007:PPS,
author = "Wei Zhong and Gulsah Altun and Xinmin Tian and Robert
Harrison and Phang C. Tai and Yi Pan",
title = "Parallel protein secondary structure prediction
schemes using {Pthread} and {OpenMP} over
hyper-threading technology",
journal = j-J-SUPERCOMPUTING,
volume = "41",
number = "1",
pages = "1--16",
month = jul,
year = "2007",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0100-1",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:31 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=41&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=41&issue=1&spage=1",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "BLOSUM62 matrix; DBNN (Denoeux Belief Neural Network);
Hydrophobicity matrix; Hyper-threading; MPI (Message
Passing Interface); Neural networks; OpenMP; Parallel
architecture; Protein secondary structure prediction;
PSSM (Position Specific Scoring Matrix); Pthread;
Speedup",
}
@Article{Akzhalova:2008:WPL,
author = "Assel Zh. Akzhalova and Daniar Y. Aizhulov and
Galymzhan Seralin and Gulnar Balakayeva",
title = "{Web} portal for large-scale computations based on
{Grid} and {MPI}",
journal = j-SCPE,
volume = "9",
number = "2",
pages = "135--142",
month = jun,
year = "2008",
CODEN = "????",
ISSN = "1895-1767",
bibdate = "Thu Sep 2 11:55:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/9/2.toc",
URL = "http://www.scpe.org/vols/vol09/no2/SCPE_9_2_06.pdf;
http://www.scpe.org/vols/vol09/no2/SCPE_9_2_06.zip",
acknowledgement = ack-nhfb,
}
@TechReport{Baboulin:2008:SID,
author = "Marc Baboulin and Jack J. Dongarra and Stanimire
Tomov",
title = "Some Issues in Dense Linear Algebra for Multicore and
Special Purpose Architectures",
type = "LAPACK Working Note",
number = "200",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = may,
year = "2008",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn200.pdf",
abstract = "We address some key issues in designing dense linear
algebra (DLA) algorithms that are common for both
multi/many-cores and special purpose architectures (in
particular GPUs). We present them in the context of an
LU factorization algorithm, where randomization
techniques are used as an alternative to pivoting. This
approach yields an algorithm based entirely on a
collection of small Level 3 BLAS type computational
tasks, which has emerged as a common goal in designing
DLA algorithms for new architectures. Other common
trends, also considered here, are block asynchronous
task execution and ``Block'' layouts for the data
associated with the separate tasks. We present
numerical results and other specific experiments with
DLA algorithms on NVIDIA GPUs using CUDA. The GPU
results are also of interest themselves as we show a
performance of up to 160 Glop/s on a single Quadro FX
5600 card. Keywords: dense linear algebra, parallel
algorithms, LU factorization, multicore processors,
graphic process units.",
acknowledgement = ack-nhfb,
utknumber = "UT-CS-08-615",
}
@Article{Bernabeu:2008:MPA,
author = "Miguel O. Bernabeu and Pedro Alonso and Antonio M.
Vidal",
title = "A multilevel parallel algorithm to solve symmetric
{Toeplitz} linear systems",
journal = j-J-SUPERCOMPUTING,
volume = "44",
number = "3",
pages = "237--256",
month = jun,
year = "2008",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0157-x",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:34 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=44&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=44&issue=3&spage=237",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Cauchy-like matrix; MPI; Multilevel parallel
programming; OpenMP; Rank displacement; Toeplitz
matrix",
}
@InProceedings{Bischof:2008:PRM,
author = "Christian Bischof and Niels Guertler and Andreas
Kowarz",
title = "Parallel Reverse Mode Automatic Differentiation for
{OpenMP} Programs with {ADOL-C}",
crossref = "Bischof:2008:AAD",
volume = "64",
pages = "163--173",
year = "2008",
DOI = "https://doi.org/10.1007/978-3-540-68942-3_15",
bibdate = "Sat Dec 22 08:33:39 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-540-68942-3_15",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-540-68942-3",
book-URL = "http://www.springerlink.com/content/978-3-540-68942-3",
}
@Article{Bondhugula:2008:PAP,
author = "Uday Bondhugula and Albert Hartono and J. Ramanujam
and P. Sadayappan",
title = "A practical automatic polyhedral parallelizer and
locality optimizer",
journal = j-SIGPLAN,
volume = "43",
number = "6",
pages = "101--113",
month = jun,
year = "2008",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1379022.1375595",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jun 18 11:04:53 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We present the design and implementation of an
automatic polyhedral source-to-source transformation
framework that can optimize regular programs (sequences
of possibly imperfectly nested loops) for parallelism
and locality simultaneously. Through this work, we show
the practicality of analytical model-driven automatic
transformation in the polyhedral model -- far beyond
what is possible by current production compilers.
Unlike previous works, our approach is an end-to-end
fully automatic one driven by an integer linear
optimization framework that takes an explicit view of
finding good ways of tiling for parallelism and
locality using affine transformations. The framework
has been implemented into a tool to automatically
generate OpenMP parallel code from C program sections.
Experimental results from the tool show very high
speedups for local and parallel execution on
multi-cores over state-of-the-art compiler frameworks
from the research community as well as the best native
production compilers. The system also enables the easy
use of powerful empirical/iterative optimization for
general arbitrarily nested loop sequences.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "affine transformations; automatic parallelization;
locality optimization; loop transformations; polyhedral
model; tiling",
}
@Article{Buntinas:2008:BVN,
author = "Darius Buntinas and Camille Coti and Thomas Herault
and Pierre Lemarinier and Laurence Pilard and Ala
Rezmerita and Eric Rodriguez and Franck Cappello",
title = "Blocking vs. non-blocking coordinated checkpointing
for large-scale fault tolerant {MPI} Protocols",
journal = j-FUT-GEN-COMP-SYS,
volume = "24",
number = "1",
pages = "73--84",
month = jan,
year = "2008",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:10 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Book{Chapman:2008:UOP,
author = "Barbara Chapman and Gabriele Jost and Ruud van der
Pas",
title = "Using {OpenMP}: portable shared memory parallel
programming",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xxii + 353",
year = "2008",
ISBN = "0-262-03377-1 (hardcover), 0-262-53302-2 (paperback)",
ISBN-13 = "978-0-262-03377-0 (hardcover), 978-0-262-53302-7
(paperback)",
LCCN = "QA76.642 .C49 2008",
bibdate = "Sat Oct 5 07:59:33 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = "Scientific and engineering computation",
URL = "http://www.loc.gov/catdir/toc/ecip0721/2007026656.html",
abstract = "This title presents a comprehensive overview of
OpenMP, the standard application programming interface
for shared memory parallel computing - a reference for
students and professionals. OpenMP, a portable
programming interface for shared memory parallel
computers, was adopted as an informal standard in 1997
by computer scientists who wanted a unified model on
which to base programs for shared memory systems.
OpenMP is now used by many software developers; it
offers significant advantages over both hand-threading
and MPI. ``Using OpenMP'' offers a comprehensive
introduction to parallel programming concepts and a
detailed overview of OpenMP. ``Using OpenMP'' discusses
hardware developments, describes where OpenMP is
applicable, and compares OpenMP to other programming
interfaces for shared and distributed memory parallel
architectures. It introduces the individual features of
OpenMP, provides many source code examples that
demonstrate the use and functionality of the language
constructs, and offers tips on writing an efficient
OpenMP program. It describes how to use OpenMP in
full-scale applications to achieve high performance on
large-scale architectures, discussing several case
studies in detail, and offers in-depth troubleshooting
advice.",
acknowledgement = ack-nhfb,
author-dates = "1954--",
subject = "Parallel programming (Computer science); Application
program interfaces (Computer software)",
tableofcontents = "1. Introduction \\
2. Overview of OpenMP \\
3. Writing a First OpenMP Program \\
4. OpenMP Language Features \\
5. How to Get Good Performance by Using OpenMP \\
6. Using OpenMP in the Real World \\
7. Troubleshooting \\
8. Under the Hood: How OpenMP Really Works \\
9. The Future of OpenMP",
}
@Article{Che:2008:PSG,
author = "Shuai Che and Michael Boyer and Jiayuan Meng and David
Tarjan and Jeremy W. Sheaffer and Kevin Skadron",
title = "A performance study of general-purpose applications on
graphics processors using {CUDA}",
journal = j-J-PAR-DIST-COMP,
volume = "68",
number = "10",
pages = "1370--1380",
month = oct,
year = "2008",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Sep 1 16:27:23 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Dalcin:2008:MPP,
author = "Lisandro Dalc{\'\i}n and Rodrigo Paz and Mario Storti
and Jorge D'El{\'\i}a",
title = "{MPI} for {Python}: Performance improvements and
{MPI-2} extensions",
journal = j-J-PAR-DIST-COMP,
volume = "68",
number = "5",
pages = "655--662",
month = may,
year = "2008",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 11 20:32:36 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{DiMartino:2008:SSG,
author = "Beniamino {Di Martino} and Dieter Kranzlm{\"u}ller and
Jack Dongarra",
title = "Special section: {Grid} computing and the {Message
Passing Interface}",
journal = j-FUT-GEN-COMP-SYS,
volume = "24",
number = "2",
pages = "119--120",
month = feb,
year = "2008",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Faraj:2008:SPA,
author = "Ahmad Faraj and Pitch Patarasuk and Xin Yuan",
title = "A Study of Process Arrival Patterns for {MPI}
Collective Operations",
journal = j-INT-J-PARALLEL-PROG,
volume = "36",
number = "6",
pages = "543--570",
month = dec,
year = "2008",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:46 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=6;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=6&spage=543",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Freeh:2008:JTD,
author = "Vincent W. Freeh and Nandini Kappiah and David K.
Lowenthal and Tyler K. Bletsch",
title = "Just-in-time dynamic voltage scaling: Exploiting
inter-node slack to save energy in {MPI} programs",
journal = j-J-PAR-DIST-COMP,
volume = "68",
number = "9",
pages = "1175--1185",
month = sep,
year = "2008",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Sep 1 16:27:22 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Fujimoto:2008:DMV,
author = "Noriyuki Fujimoto",
title = "Dense Matrix-Vector Multiplication on the {CUDA}
Architecture",
journal = j-PARALLEL-PROCESS-LETT,
volume = "18",
number = "4",
pages = "511--530",
month = dec,
year = "2008",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626408003545",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:11 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Gao:2008:GEI,
author = "Guang R. Gao and Mitsuhisa Sato and Eduard
Ayguad{\'e}",
title = "{Guest Editors} Introduction: Special Issue on
{OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "36",
number = "3",
pages = "287--288",
month = jun,
year = "2008",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-008-0076-3",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:07:10 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=287",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Garland:2008:PCE,
author = "Michael Garland and Scott {Le Grand} and John Nickolls
and Joshua Anderson and Jim Hardwick and Scott Morton
and Everett Phillips and Yao Zhang and Vasily Volkov",
title = "Parallel Computing Experiences with {CUDA}",
journal = j-IEEE-MICRO,
volume = "28",
number = "4",
pages = "13--27",
month = jul # "\slash " # aug,
year = "2008",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2008.57",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Tue Sep 9 15:18:16 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Genaud:2008:EPC,
author = "St{\'e}phane Genaud and Pierre Gan{\c{c}}arski and
Guillaume Latu and Alexandre Blansch{\'e} and Choopan
Rattanapoka and Damien Vouriot",
title = "Exploitation of a parallel clustering algorithm on
commodity hardware with {P2P-MPI}",
journal = j-J-SUPERCOMPUTING,
volume = "43",
number = "1",
pages = "21--41",
month = jan,
year = "2008",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0136-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:33 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=43&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=43&issue=1&spage=21",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Clustering; Evolutionary algorithms; Grid; Java;
Parallel algorithms",
}
@Article{Gregoretti:2008:MGE,
author = "F. Gregoretti and G. Laccetti and A. Murli and G.
Oliva and U. Scafuri",
title = "{MGF}: a grid-enabled {MPI} library",
journal = j-FUT-GEN-COMP-SYS,
volume = "24",
number = "2",
pages = "158--165",
month = feb,
year = "2008",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Ha:2008:NBP,
author = "Phuong Hoai Ha and Philippas Tsigas and Otto J.
Anshus",
title = "Non-blocking programming on multi-core graphics
processors: (extended abstract)",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "5",
pages = "19--28",
month = dec,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1556444.1556448",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Fri Jun 26 11:50:56 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper investigates the synchronization power of
coalesced memory accesses, a family of memory access
mechanisms introduced in recent large multicore
architectures like the CUDA graphics processors. We
first design three memory access models to capture the
fundamental features of the new memory access
mechanisms. Subsequently, we prove the exact
synchronization power of these models in terms of their
consensus numbers. These tight results show that the
coalesced memory access mechanisms can facilitate
strong synchronization between the threads of multicore
processors, without the need of synchronization
primitives other than reads and writes.\par
Moreover, based on the intrinsic features of recent GPU
architectures, we construct strong synchronization
objects like wait-free and t-resilient
read-modify-write objects for a general model of recent
GPU architectures without strong hardware
synchronization primitives like test-and-set and
compare-and-swap. Accesses to the wait-free objects
have time complexity $ O(N) $, where $N$ is the number
of processes. Our result demonstrates that it is
possible to construct waitfree synchronization
mechanisms for GPUs without the need of strong
synchronization primitives in hardware and that
wait-free programming is possible for GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Hou:2008:BBS,
author = "Qiming Hou and Kun Zhou and Baining Guo",
title = "{BSGP}: bulk-synchronous {GPU} programming",
journal = j-TOG,
volume = "27",
number = "3",
pages = "19:1--19:??",
month = aug,
year = "2008",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/1360612.1360618",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Tue Aug 12 13:40:36 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tog/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
abstract = "We present BSGP, a new programming language for
general purpose computation on the GPU. A BSGP program
looks much the same as a sequential C program.
Programmers only need to supply a bare minimum of extra
information to describe parallel processing on GPUs. As
a result, BSGP programs are easy to read, write, and
maintain. Moreover, the ease of programming does not
come at the cost of performance. A well-designed BSGP
compiler converts BSGP programs to kernels and combines
them using optimally allocated temporary streams. In
our benchmark, BSGP programs achieve similar or better
performance than well-optimized CUDA programs, while
the source code complexity and programming time are
significantly reduced. To test BSGP's code efficiency
and ease of programming, we implemented a variety of
GPU applications, including a highly sophisticated X3D
parser that would be extremely difficult to develop
with existing GPU programming languages.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Graphics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778",
keywords = "bulk synchronous parallel programming; programable
graphics hardware; stream processing; thread
manipulation",
}
@InCollection{Howes:2008:U,
author = "L. Howes and D. B. Thomas",
title = "Efficient Random Number Generation and Application
Using {CUDA}",
crossref = "Nguyen:2008:GG",
chapter = "37",
pages = "805--830",
year = "2008",
bibdate = "Sat Feb 08 18:40:34 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "random-number generation",
}
@Article{Huang:2008:FPM,
author = "Jih-Woei Huang and Chih-Ping Chu",
title = "A flexible processor mapping technique toward data
localization for block-cyclic data redistribution",
journal = j-J-SUPERCOMPUTING,
volume = "45",
number = "2",
pages = "151--172",
month = aug,
year = "2008",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0166-9",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:35 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=45&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=45&issue=2&spage=151",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Data distribution; Data-parallel programming;
Distributed memory multicomputers; HPF; MPI; Processor
mapping",
}
@Article{Jeun:2008:OPB,
author = "Woo-Chul Jeun and Yang-Suk Kee and Soonhoi Ha and
Changdon Kee",
title = "Overcoming performance bottlenecks in using {OpenMP}
on {SMP} clusters",
journal = j-PARALLEL-COMPUTING,
volume = "34",
number = "10",
pages = "570--592",
month = oct,
year = "2008",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:09 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Jin:2008:PEM,
author = "Haoqiang Jin and Barbara Chapman and Lei Huang and
Dieter an Mey and Thomas Reichstein",
title = "Performance Evaluation of a Multi-Zone Application in
Different {OpenMP} Approaches",
journal = j-INT-J-PARALLEL-PROG,
volume = "36",
number = "3",
pages = "312--325",
month = jun,
year = "2008",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-008-0074-5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:07:10 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=312",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Multi-level parallelism; OpenMP extensions;
Performance evaluation",
}
@Article{Kwon:2008:RPP,
author = "Seongnam Kwon and Yongjoo Kim and Woo-Chul Jeun and
Soonhoi Ha and Yunheung Paek",
title = "A retargetable parallel-programming framework for
{MPSoC}",
journal = j-TODAES,
volume = "13",
number = "3",
pages = "39:1--39:??",
month = jul,
year = "2008",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1367045.1367048",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Tue Aug 5 18:41:27 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "As more processing elements are integrated in a single
chip, embedded software design becomes more
challenging: It becomes a parallel programming for
nontrivial heterogeneous multiprocessors with diverse
communication architectures, and design constraints
such as hardware cost, power, and timeliness. In the
current practice of parallel programming with MPI or
OpenMP, the programmer should manually optimize the
parallel code for each target architecture and for the
design constraints. Thus, the design-space exploration
of MPSoC (multiprocessor systems-on-chip) costs become
prohibitively large as software development overhead
increases drastically. To solve this problem, we
develop a parallel-programming framework based on a
novel programming model called common intermediate code
(CIC). In a CIC, functional parallelism and data
parallelism of application tasks are specified
independently of the target architecture and design
constraints. Then, the CIC translator translates the
CIC into the final parallel code, considering the
target architecture and design constraints to make the
CIC retargetable. Experiments with preliminary
examples, including the H.263 decoder, show that the
proposed parallel-programming framework increases the
design productivity of MPSoC software significantly.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design-space exploration; embedded software;
multiprocessor system on chip; parallel-programming;
software generation",
}
@Article{Liu:2008:AMD,
author = "Weiguo Liu and Bertil Schmidt and Gerrit Voss and
Wolfgang M{\"u}ller-Wittig",
title = "Accelerating molecular dynamics simulations using
Graphics Processing Units with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "179",
number = "9",
pages = "634--641",
day = "1",
month = nov,
year = "2008",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2008.05.008",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:42:37 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465508002191",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Luckow:2008:MFT,
author = "Andr{\'e} Luckow and Bettina Schnor",
title = "{Migol}: a fault-tolerant service framework for {MPI}
applications in the {Grid}",
journal = j-FUT-GEN-COMP-SYS,
volume = "24",
number = "2",
pages = "142--152",
month = feb,
year = "2008",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Milovanovic:2008:NEE,
author = "Milos Milovanovi{\'c} and Roger Ferrer and Vladimir
Gajinov and Osman S. Unsal and Adrian Cristal and
Eduard Ayguad{\'e} and Mateo Valero",
title = "{Nebelung}: Execution Environment for Transactional
{OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "36",
number = "3",
pages = "326--346",
month = jun,
year = "2008",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-008-0073-6",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:07:10 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=326",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Compiler; OpenMP; Runtime system; Software
Transactional Memory",
}
@Article{Nickolls:2008:SPP,
author = "John Nickolls and Ian Buck and Michael Garland and
Kevin Skadron",
title = "Scalable parallel programming with {CUDA}",
journal = j-QUEUE,
volume = "6",
number = "2",
pages = "40--53",
month = mar,
year = "2008",
CODEN = "AQCUAE",
DOI = "https://doi.org/10.1145/1365490.1365500",
ISSN = "1542-7730 (print), 1542-7749 (electronic)",
ISSN-L = "1542-7730",
bibdate = "Fri Jun 20 11:18:38 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/queue.bib",
abstract = "Is CUDA the parallel programming model that
application developers have been waiting for?",
acknowledgement = ack-nhfb,
fjournal = "ACM Queue: Tomorrow's Computing Today",
}
@Article{Noble:2008:GMY,
author = "Michael S. Noble",
title = "Getting more from your multicore: exploiting {OpenMP}
from an open-source numerical scripting language",
journal = j-CCPE,
volume = "20",
number = "16",
pages = "1877--1891",
month = nov,
year = "2008",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1296",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:28 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "4 Jun 2008",
}
@Article{OBrien:2008:SOC,
author = "Kevin O{\'B}rien and Kathryn O{\'B}rien and Zehra Sura
and Tong Chen and Tao Zhang",
title = "Supporting {OpenMP} on {Cell}",
journal = j-INT-J-PARALLEL-PROG,
volume = "36",
number = "3",
pages = "289--311",
month = jun,
year = "2008",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-008-0072-7",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Jul 9 16:07:10 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=36&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=36&issue=3&spage=289",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
keywords = "Data transfer; Heterogeneous architecture; OpenMP;
Thread synchronization",
}
@Article{Patrick:2008:CEO,
author = "Christina M. Patrick and SeungWoo Son and Mahmut
Kandemir",
title = "Comparative evaluation of overlap strategies with
study of {I/O} overlap in {MPI-IO}",
journal = j-OPER-SYS-REV,
volume = "42",
number = "6",
pages = "43--49",
month = oct,
year = "2008",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/1453775.1453784",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Thu Oct 23 14:23:29 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Many scientific applications use parallel I/O to meet
the low latency and high bandwidth I/O requirement.
Among many available parallel I/O operations,
collective I/O is one of the most popular methods when
the storage layouts and access patterns of data do not
match. The implementation of collective I/O typically
involves disk I/O operations followed by interprocessor
communications. Also, in many I/O-intensive
applications, parallel I/O operations are usually
followed by parallel computations. This paper presents
a comparative study of different overlap strategies in
parallel applications. We have experimented with four
different overlap strategies (1) Overlapping I/O and
communication; (2) Overlapping I/O and computation; (3)
Overlapping computation and communication; and (4)
Overlapping I/O, communication, and computation. All
experiments have been conducted on a Linux Cluster and
the performance results obtained are very encouraging.
On an average, we have enhanced the performance of a
generic collective read call by 38\%, the MxM benchmark
by 26\%, and the FFT benchmark by 34\%.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
}
@Article{Rodriguez:2008:FTS,
author = "Gabriel Rodr{\'\i}guez and Xo{\'a}n C. Pardo and
Mar{\'\i}a J. Mart{\'\i}n and Patricia Gonz{\'a}lez and
Daniel D{\'\i}az",
title = "A Fault Tolerance Solution for Sequential and {MPI}
Applications on the {Grid}",
journal = j-SCPE,
volume = "9",
number = "2",
pages = "101--109",
month = jun,
year = "2008",
CODEN = "????",
ISSN = "1895-1767",
bibdate = "Thu Sep 2 11:55:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.scpe.org/content/9/2.toc",
URL = "http://www.scpe.org/vols/vol09/no2/SCPE_9_2_03.pdf;
http://www.scpe.org/vols/vol09/no2/SCPE_9_2_03.zip",
acknowledgement = ack-nhfb,
}
@Article{Rolfe:2008:PFO,
author = "Timothy J. Rolfe",
title = "Perverse and foolish oft {I} strayed",
journal = j-SIGCSE,
volume = "40",
number = "2",
pages = "52--55",
month = jun,
year = "2008",
CODEN = "SIGSD3",
DOI = "https://doi.org/10.1145/1383602.1383634",
ISSN = "0097-8418 (print), 2331-3927 (electronic)",
ISSN-L = "0097-8418",
bibdate = "Sat Nov 17 15:44:13 MST 2012",
bibsource = "DBLP;
http://dblp.uni-trier.de/db/journals/sigcse/sigcse40.html#Rolfe08;
http://portal.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigcse2000.bib",
URL = "ftp://ftp.math.utah.edu/pub/mirrors/ftp.ira.uka.de/bibliography/Misc/DBLP/2008.bib",
abstract = "This uses a massively wrong-headed algorithm for
sorting to exemplify the use of the backtracking
strategy and the branch-and-bound strategy. In
addition, brief notes are included on parallel
processing approaches: Java threads on multi-core
computers and distributed processing through such
message passing systems as PVM and MPI.",
acknowledgement = ack-nhfb,
fjournal = "SIGCSE Bulletin (ACM Special Interest Group on
Computer Science Education)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688",
}
@Article{Rolfe:2008:SMA,
author = "Timothy J. Rolfe",
title = "A specimen {MPI} application: {$N$}-Queens in
parallel",
journal = j-SIGCSE,
volume = "40",
number = "4",
pages = "42--45",
month = dec,
year = "2008",
CODEN = "SIGSD3",
DOI = "https://doi.org/10.1145/1473195.1473217",
ISSN = "0097-8418 (print), 2331-3927 (electronic)",
ISSN-L = "0097-8418",
bibdate = "Sat Nov 17 15:44:17 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigcse2000.bib",
abstract = "The generalized problem of placing n queens on an n
-by- n board provides an ``embarrassingly parallel''
problem for parallel solution. This paper expands on
the discussion presented in the May 2005 issue of Dr.
Dobb's Journal [1], specifically taking the parallel
execution through Java threads and bringing it into an
application in C taking advantage of MPI.",
acknowledgement = ack-nhfb,
fjournal = "SIGCSE Bulletin (ACM Special Interest Group on
Computer Science Education)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J688",
}
@Article{Sala:2008:PHP,
author = "Marzio Sala and W. F. Spotz and M. A. Heroux",
title = "{PyTrilinos}: {High-performance} distributed-memory
solvers for {Python}",
journal = j-TOMS,
volume = "34",
number = "2",
pages = "7:1--7:33",
month = mar,
year = "2008",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1326548.1326549",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Thu Jun 12 12:47:31 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "PyTrilinos is a collection of Python modules that are
useful for serial and parallel scientific computing.
This collection contains modules that cover serial and
parallel dense linear algebra, serial and parallel
sparse linear algebra, direct and iterative linear
solution techniques, domain decomposition and
multilevel preconditioners, nonlinear solvers, and
continuation algorithms. Also included are a variety of
related utility functions and classes, including
distributed I/O, coloring algorithms, and matrix
generation. PyTrilinos vector objects are integrated
with the popular NumPy Python module, gathering
together a variety of high-level distributed computing
operations with serial vector
operations.\par
PyTrilinos is a set of interfaces to existing, compiled
libraries. This hybrid framework uses Python as
front-end, and efficient precompiled libraries for all
computationally expensive tasks. Thus, we take
advantage of both the flexibility and ease of use of
Python, and the efficiency of the underlying C++, C,
and FORTRAN numerical kernels. Out numerical results
show that, for many important problem classes, the
overhead required by the Python interpreter is
negligible.\par
To run in parallel, PyTrilinos simply requires a
standard Python interpreter. The fundamental MPI calls
are encapsulated under an abstract layer that manages
all interprocessor communications. This makes serial
and parallel scripts using PyTrilinos virtually
identical.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Mathematical Software",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "direct solvers; multilevel preconditioners; nonlinear
solvers; object-oriented programming; script
languages",
}
@Article{Schmitz:2008:IIG,
author = "Arne Schmitz and Markus Tavenrath and Leif Kobbelt",
title = "Illumination: Interactive Global Illumination for
Deformable Geometry in {CUDA}",
journal = j-CGF,
volume = "27",
number = "7",
pages = "1979--1986",
month = oct,
year = "2008",
CODEN = "CGFODY",
DOI = "https://doi.org/10.1111/j.1467-8659.2008.01347.x",
ISSN = "0167-7055 (print), 1467-8659 (electronic)",
ISSN-L = "0167-7055",
bibdate = "Sat May 11 13:27:05 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cgf.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Com{\-}pu{\-}ter Graphics Forum",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1467-8659/",
onlinedate = "23 Jan 2009",
}
@Article{Siegel:2008:CSE,
author = "Stephen F. Siegel and Anastasia Mironova and George S.
Avrunin and Lori A. Clarke",
title = "Combining symbolic execution with model checking to
verify parallel numerical programs",
journal = j-TOSEM,
volume = "17",
number = "2",
pages = "10:1--10:??",
month = apr,
year = "2008",
CODEN = "ATSMER",
DOI = "https://doi.org/10.1145/1348250.1348256",
ISSN = "1049-331X (print), 1557-7392 (electronic)",
ISSN-L = "1049-331X",
bibdate = "Mon Jun 16 11:13:13 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tosem/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We present a method to verify the correctness of
parallel programs that perform complex numerical
computations, including computations involving
floating-point arithmetic. This method requires that a
sequential version of the program be provided, to serve
as the specification for the parallel one. The key idea
is to use model checking, together with symbolic
execution, to establish the equivalence of the two
programs. In this approach the path condition from
symbolic execution of the sequential program is used to
constrain the search through the parallel program. To
handle floating-point operations, three different types
of equivalence are supported. Several examples are
presented, demonstrating the approach and actual errors
that were found. Limitations and directions for future
research are also described.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Software Engineering and
Methodology",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J790",
keywords = "concurrency; Finite-state verification;
floating-point; high performance computing; Message
Passing Interface; model checking; MPI; numerical
program; parallel programming; Spin; symbolic
execution",
}
@Article{Valencia:2008:PPR,
author = "David Valencia and Alexey Lastovetsky and Maureen
O'Flynn and Antonio Plaza and Javier Plaza",
title = "Parallel Processing of Remotely Sensed Hyperspectral
Images on Heterogeneous Networks of Workstations Using
{HeteroMPI}",
journal = j-IJHPCA,
volume = "22",
number = "4",
pages = "386--407",
month = nov,
year = "2008",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342007088377",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/22/4.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/22/4/386.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{VanZee:2008:SPF,
author = "Field G. {Van Zee} and Paolo Bientinesi and Tze Meng
Low and Robert A. van de Geijn",
title = "Scalable parallelization of {FLAME} code via the
workqueuing model",
journal = j-TOMS,
volume = "34",
number = "2",
pages = "10:1--10:29",
month = mar,
year = "2008",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1326548.1326552",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Thu Jun 12 12:47:31 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We discuss the OpenMP parallelization of linear
algebra algorithms that are coded using the Formal
Linear Algebra Methods Environment (FLAME) API. This
API expresses algorithms at a higher level of
abstraction, avoids the use loop and array indices, and
represents these algorithms as they are formally
derived and presented. We report on two implementations
of the workqueuing model, neither of which requires the
use of explicit indices to specify parallelism. The
first implementation uses the experimental taskq
pragma, which may influence the adoption of a similar
construct into OpenMP 3.0. The second workqueuing
implementation is domain-specific to FLAME but allows
us to illustrate the benefits of sorting tasks
according to their computational cost prior to parallel
execution. In addition, we discuss how scalable
parallelization of dense linear algebra algorithms via
OpenMP will require a two-dimensional partitioning of
operands much like a 2D data distribution is needed on
distributed memory architectures. We illustrate the
issues and solutions by discussing the parallelization
of the symmetric rank-$k$ update and report impressive
performance on an SGI system with 14 Itanium2
processors.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Mathematical Software",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "FLAME; OpenMP; parallel; scalability; SMP;
workqueuing",
}
@Article{Wang:2008:PIM,
author = "Kun Wang and Yu Zhang and Huayong Wang and Xiaowei
Shen",
title = "Parallelization of {IBM Mambo} system simulator in
functional modes",
journal = j-OPER-SYS-REV,
volume = "42",
number = "1",
pages = "71--76",
month = jan,
year = "2008",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/1341312.1341325",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Fri Jun 20 17:19:29 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Mambo [4] is IBM's full-system simulator which models
PowerPC systems, and provides a complete set of
simulation tools to help IBM and its partners in
pre-hardware development and performance evaluation for
future systems. Currently Mambo simulates target
systems on a single host thread. When the number of
cores increases in a target system, Mambo's simulation
performance for each core goes down. As the so-called
`multi-core era' approaches, both target and host
systems will have more and more cores. It is very
important for Mambo to efficiently simulate a
multi-core target system on a multi-core host system.
Parallelization is a natural method to speed up Mambo
under this situation.\par
Parallel Mambo (P-Mambo) is a multi-threaded
implementation of Mambo. Mambo's simulation engine is
implemented as a user-level thread-scheduler. We
propose a multi-scheduler method to adapt Mambo's
simulation engine to multi-threaded execution. Based on
this method a core-based module partition is proposed
to achieve both high inter-scheduler parallelism and
low inter-scheduler dependency. Protection of shared
resources is crucial to both correctness and
performance of P-Mambo. Since there are two tiers of
threads in P-Mambo, protecting shared resources by only
OS-level locks possibly introduces deadlocks due to
user-level context switch. We propose a new lock
mechanism to handle this problem. Since Mambo is an
on-going project with many modules currently under
development, co-existence with new modules is also
important to P-Mambo. We propose a global-lock-based
method to guarantee compatibility of P-Mambo with
future Mambo modules.\par
We have implemented the first version of P-Mambo in
functional modes. The performance of P-Mambo has been
evaluated on the OpenMP implementation of NAS Parallel
Benchmark (NPB) 3.2 [12]. Preliminary experimental
results show that P-Mambo achieves an average speedup
of 3.4 on a 4-core host machine.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
keywords = "architectural simulation; dynamic binary translation;
parallel simulation",
}
@Article{Wegiel:2008:MCVa,
author = "Michal Wegiel and Chandra Krintz",
title = "The mapping collector: virtual memory support for
generational, parallel, and concurrent compaction",
journal = j-COMP-ARCH-NEWS,
volume = "36",
number = "1",
pages = "91--102",
month = mar,
year = "2008",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1353535.1346294",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jun 17 11:51:35 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallel and concurrent garbage collectors are
increasingly employed by managed runtime environments
(MREs) to maintain scalability, as multi-core
architectures and multi-threaded applications become
pervasive. Moreover, state-of-the-art MREs commonly
implement compaction to eliminate heap fragmentation
and enable fast linear object allocation.\par
Our empirical analysis of object demographics reveals
that unreachable objects in the heap tend to form
clusters large enough to be effectively managed at the
granularity of virtual memory pages. Even though
processes can manipulate the mapping of the virtual
address space through the standard operating system
(OS) interface on most platforms, extant
parallel/concurrent compactors do not do so to exploit
this clustering behavior and instead achieve compaction
by performing, relatively expensive, object moving and
pointer adjustment.\par
We introduce the Mapping Collector (MC), which
leverages virtual memory operations to reclaim and
consolidate free space without moving objects and
updating pointers. MC is a nearly-single-phase
compactor that is simpler and more efficient than
previously reported compactors that comprise two to
four phases. Through effective MRE-OS coordination, MC
maintains the simplicity of a non-moving collector
while providing efficient parallel and concurrent
compaction.\par
We implement both stop-the-world and concurrent MC in a
generational garbage collection framework within the
open-source HotSpot Java Virtual Machine. Our
experimental evaluation using a multiprocessor
indicates that MC significantly increases throughput
and scalability as well as reduces pause times,
relative to state-of-the-art, parallel and concurrent
compactors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
keywords = "compaction; concurrent; parallel; virtual memory",
}
@Article{Wegiel:2008:MCVb,
author = "Michal Wegiel and Chandra Krintz",
title = "The {Mapping Collector}: virtual memory support for
generational, parallel, and concurrent compaction",
journal = j-OPER-SYS-REV,
volume = "42",
number = "2",
pages = "91--102",
month = mar,
year = "2008",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/1353535.1346294",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Fri Jun 20 17:20:12 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallel and concurrent garbage collectors are
increasingly employed by managed runtime environments
(MREs) to maintain scalability, as multi-core
architectures and multi-threaded applications become
pervasive. Moreover, state-of-the-art MREs commonly
implement compaction to eliminate heap fragmentation
and enable fast linear object allocation.\par
Our empirical analysis of object demographics reveals
that unreachable objects in the heap tend to form
clusters large enough to be effectively managed at the
granularity of virtual memory pages. Even though
processes can manipulate the mapping of the virtual
address space through the standard operating system
(OS) interface on most platforms, extant
parallel/concurrent compactors do not do so to exploit
this clustering behavior and instead achieve compaction
by performing, relatively expensive, object moving and
pointer adjustment.\par
We introduce the Mapping Collector (MC), which
leverages virtual memory operations to reclaim and
consolidate free space without moving objects and
updating pointers. MC is a nearly-single-phase
compactor that is simpler and more efficient than
previously reported compactors that comprise two to
four phases. Through effective MRE-OS coordination, MC
maintains the simplicity of a non-moving collector
while providing efficient parallel and concurrent
compaction.\par
We implement both stop-the-world and concurrent MC in a
generational garbage collection framework within the
open-source HotSpot Java Virtual Machine. Our
experimental evaluation using a multiprocessor
indicates that MC significantly increases throughput
and scalability as well as reduces pause times,
relative to state-of-the-art, parallel and concurrent
compactors.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
keywords = "compaction; concurrent; parallel; virtual memory",
}
@Article{Wegiel:2008:MCVc,
author = "Michal Wegiel and Chandra Krintz",
title = "The mapping collector: virtual memory support for
generational, parallel, and concurrent compaction",
journal = j-SIGPLAN,
volume = "43",
number = "3",
pages = "91--102",
month = mar,
year = "2008",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1353535.1346294",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Jun 18 11:03:40 MDT 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallel and concurrent garbage collectors are
increasingly employed by managed runtime environments
(MREs) to maintain scalability, as multi-core
architectures and multi-threaded applications become
pervasive. Moreover, state-of-the-art MREs commonly
implement compaction to eliminate heap fragmentation
and enable fast linear object allocation.\par
Our empirical analysis of object demographics reveals
that unreachable objects in the heap tend to form
clusters large enough to be effectively managed at the
granularity of virtual memory pages. Even though
processes can manipulate the mapping of the virtual
address space through the standard operating system
(OS) interface on most platforms, extant
parallel/concurrent compactors do not do so to exploit
this clustering behavior and instead achieve compaction
by performing, relatively expensive, object moving and
pointer adjustment.\par
We introduce the Mapping Collector (MC), which
leverages virtual memory operations to reclaim and
consolidate free space without moving objects and
updating pointers. MC is a nearly-single-phase
compactor that is simpler and more efficient than
previously reported compactors that comprise two to
four phases. Through effective MRE-OS coordination, MC
maintains the simplicity of a non-moving collector
while providing efficient parallel and concurrent
compaction.\par
We implement both stop-the-world and concurrent MC in a
generational garbage collection framework within the
open-source HotSpot Java Virtual Machine. Our
experimental evaluation using a multiprocessor
indicates that MC significantly increases throughput
and scalability as well as reduces pause times,
relative to state-of-the-art, parallel and concurrent
compactors.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "compaction; concurrent; parallel; virtual memory",
}
@Article{Yang:2008:DPL,
author = "Chao-Tung Yang and Wen-Chung Shih and Shian-Shyong
Tseng",
title = "Dynamic partitioning of loop iterations on
heterogeneous {PC} clusters",
journal = j-J-SUPERCOMPUTING,
volume = "44",
number = "1",
pages = "1--23",
month = apr,
year = "2008",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-007-0146-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jul 9 17:32:34 MDT 2008",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=44&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=44&issue=1&spage=1",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
keywords = "Cluster computing; Heterogeneous; MPI programming;
Parallel loops; PC clusters; Self-scheduling",
}
@Article{Ayguade:2009:DOT,
author = "Eduard Ayguade and Nawal Copty and Alejandro Duran and
Jay Hoeflinger and Yuan Lin and Federico Massaioli and
Xavier Teruel and Priya Unnikrishnan and Guansong
Zhang",
title = "The Design of {OpenMP} Tasks",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "20",
number = "3",
pages = "404--418",
month = mar,
year = "2009",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2008.105",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu May 13 12:06:56 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Bikshandi:2009:EPI,
author = "Ganesh Bikshandi and Jose G. Castanos and Sreedhar B.
Kodali and V. Krishna Nandivada and Igor Peshansky and
Vijay A. Saraswat and Sayantan Sur and Pradeep Varma
and Tong Wen",
title = "Efficient, portable implementation of asynchronous
multi-place programs",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "271--282",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1594835.1504215",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The X10 programming language is organized around the
notion of places (an encapsulation of data and
activities operating on the data), partitioned global
address space (PGAS), and asynchronous computation and
communication.\par
This paper introduces an expressive subset of X10, Flat
X10, designed to permit efficient execution across
multiple single-threaded places with a simple runtime
and without compromising on the productivity of X10. We
present the design, implementation and evaluation of a
compiler and runtime system for Flat X10. The Flat X10
compiler translates programs into C++ SPMD programs
communicating using an active messaging infrastructure.
It uses novel techniques to transform explicitly
parallel programs into SPMD programs. The runtime
system is based on IBM's LAPI (Low-level API) and is
easily portable to other libraries such as GASNet and
ARMCI.\par
Our implementation realizes performance comparable to
hand-written MPI programs for well-known HPC benchmarks
such as Random Access, Stream, and FFT, on a
Federation-based cluster of Power5 SMPs (with hundreds
of processors) and the Blue Gene (with thousands of
processors). Submissions based on the work presented in
this paper were co-winners of the 2007 and 2008 HPC
Challenge Type II Awards.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "APGAS; asynchrony; compiler; FFT; HPC; HPC challenge;
PGAS; random access; runtime; SPMD; stream; X10",
}
@Article{Bronevetsky:2009:CAC,
author = "Greg Bronevetsky and John Gyllenhaal and Bronis R. de
Supinski",
title = "{CLOMP}: Accurately Characterizing {OpenMP}
Application Overheads",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "3",
pages = "250--265",
month = jun,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:47 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=250",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Cappello:2009:FSI,
author = "Franck Cappello and Thomas Herault and Jack Dongarra",
title = "Foreword: Special issue: selected papers from the
{14th European PVM\slash MPI Users Group Meeting}",
journal = j-PARALLEL-COMPUTING,
volume = "35",
number = "12",
pages = "571",
year = "2009",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2009.11.001",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
MRclass = "68-06 (68M10 68M12)",
MRnumber = "MR2596831",
bibdate = "Sat Sep 4 17:11:07 2010",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Held in Paris, September 30--October 3, 2007",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing. Systems \& Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Duran:2009:PEO,
author = "Alejandro Duran and Roger Ferrer and Eduard
Ayguad{\'e} and Rosa M. Badia and Jesus Labarta",
title = "A Proposal to Extend the {OpenMP} Tasking Model with
Dependent Tasks",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "3",
pages = "292--305",
month = jun,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:47 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=292",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Dursun:2009:MPM,
author = "Hikmet Dursun and Kevin J. Barker and Darren J.
Kerbyson and Scott Pakin and Richard Seymour and Rajiv
K. Kalia and Aiichiro Nakano and Priya Vashishta",
title = "An {MPI} Performance Monitoring Interface for Cell
Based Compute Nodes",
journal = j-PARALLEL-PROCESS-LETT,
volume = "19",
number = "4",
pages = "535--552",
month = dec,
year = "2009",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626409000407",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:12 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{ElMaghraoui:2009:MIM,
author = "K. {El Maghraoui} and Travis J. Desell and Boleslaw K.
Szymanski and Carlos A. Varela",
title = "Malleable iterative {MPI} applications",
journal = j-CCPE,
volume = "21",
number = "3",
pages = "393--413",
day = "10",
month = mar,
year = "2009",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1362",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:30 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "1 Sep 2008",
}
@Article{Furlinger:2009:CAE,
author = "Karl F{\"u}rlinger and Shirley Moore",
title = "Capturing and Analyzing the Execution Control Flow of
{OpenMP} Applications",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "3",
pages = "266--276",
month = jun,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:47 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=266",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Genaud:2009:FMP,
author = "St{\'e}phane Genaud and Emmanuel Jeannot and Choopan
Rattanapoka",
title = "Fault-Management in {P2P-MPI}",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "5",
pages = "433--461",
month = oct,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:48 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=5&spage=433",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Giannoutakis:2009:DIP,
author = "Konstantinos M. Giannoutakis and George A. Gravvanis",
title = "Design and implementation of parallel approximate
inverse classes using {OpenMP}",
journal = j-CCPE,
volume = "21",
number = "2",
pages = "115--131",
month = feb,
year = "2009",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1324",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:30 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "6 Jun 2008",
}
@TechReport{Granat:2009:NPQ,
author = "Robert Granat and Bo K{\aa}gstr{\"o}m and Daniel
Kressner",
title = "A novel parallel {QR} algorithm for hybrid distributed
memory {HPC} systems",
type = "LAPACK Working Note",
number = "216",
institution = "Department of Computing Science and HPC2N",
address = "Ume{\aa} University, S-901 Ume{\aa}, Sweden",
month = apr,
year = "2009",
bibdate = "Fri Apr 24 12:25:43 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn216.pdf",
abstract = "A novel variant of the parallel QR algorithm for
solving dense nonsymmetric eigenvalue problems on
hybrid distributed high performance computing (HPC)
systems is presented. For this purpose, we introduce
the concept of multi-window bulge chain chasing and
parallelize aggressive early deflation. The
multi-window approach ensures that most computations
when chasing chains of bulges are performed in level 3
BLAS operations, while the aim of aggressive early
deflation is to speed up the convergence of the QR
algorithm. Mixed MPI-OpenMP coding techniques are
utilized for porting the codes to distributed memory
platforms with multithreaded nodes, such as multicore
processors. Numerous numerical experiments confirm the
superior performance of our parallel QR algorithm in
comparison with the existing ScaLAPACK code, leading to
an implementation that is one to two orders of
magnitude faster for sufficiently large problems,
including a number of examples from applications.",
acknowledgement = ack-nhfb,
keywords = "aggressive early deflation; bulge chasing; Eigenvalue
problem; hybrid distributed memory systems.; level 3
performance; multishift; nonsymmetric QR algorithm;
parallel algorithms; parallel computations",
utknumber = "UMINF-09.06",
}
@Article{Gravvanis:2009:OBP,
author = "George A. Gravvanis",
title = "{OpenMP} based parallel normalized direct methods for
sparse finite element linear systems",
journal = j-J-SUPERCOMPUTING,
volume = "47",
number = "1",
pages = "44--52",
month = jan,
year = "2009",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Aug 25 08:38:28 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=47&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=47&issue=1&spage=44",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Hadjidoukas:2009:HPF,
author = "P. E. Hadjidoukas and V. V. Dimakopoulos and M.
Delakis and C. Garcia",
title = "A high-performance face detection system using
{OpenMP}",
journal = j-CCPE,
volume = "21",
number = "15",
pages = "1819--1837",
month = oct,
year = "2009",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1389",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:38 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "26 Mar 2009",
}
@Article{He:2009:AVS,
author = "Jian He and Layne T. Watson and Masha Sosonkina",
title = "{Algorithm 897}: {VTDIRECT95}: {Serial} and parallel
codes for the global optimization algorithm direct",
journal = j-TOMS,
volume = "36",
number = "3",
pages = "17:1--17:24",
month = jul,
year = "2009",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1527286.1527291",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Jul 21 14:09:07 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
note = "See remark \cite{Sosonkina:2015:RAV}.",
abstract = "VTDIRECT95 is a Fortran 95 implementation of D. R.
Jones' deterministic global optimization algorithm
called {\em DIRECT}, which is widely used in
multidisciplinary engineering design, biological
science, and physical science applications. The package
includes both a serial code and a data-distributed
massively parallel code for different problem scales
and optimization (exploration vs. exploitation) goals.
Dynamic data structures are used to organize local
data, handle unpredictable memory requirements, reduce
the memory usage, and share the data across multiple
processors. The parallel code employs a multilevel
functional and data parallelism to boost concurrency
and mitigate the data dependency, thus improving the
load balancing and scalability. In addition,
checkpointing features are integrated into both
versions to provide fault tolerance and hot restarts.
Important algorithm modifications and design
considerations are discussed regarding data structures,
parallel schemes, error handling, and portability.
Using several benchmark functions and real-world
applications, the software is evaluated on different
systems in terms of optimization effectiveness, data
structure efficiency, parallel performance, and
checkpointing overhead. The package organization and
usage are also described in detail.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "checkpointing; data structures; DIRECT; global
optimization; parallel schemes",
}
@Article{Hilbrich:2009:MCC,
author = "Tobias Hilbrich and Matthias S. M{\"u}ller and Bettina
Krammer",
title = "{MPI} Correctness Checking for {OpenMP\slash MPI}
Applications",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "3",
pages = "277--291",
month = jun,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:47 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=3&spage=277",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Hong:2009:AMG,
author = "Sunpyo Hong and Hyesoon Kim",
title = "An analytical model for a {GPU} architecture with
memory-level and thread-level parallelism awareness",
journal = j-COMP-ARCH-NEWS,
volume = "37",
number = "3",
pages = "152--163",
month = jun,
year = "2009",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1555815.1555775",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Aug 11 18:12:55 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPU architectures are increasingly important in the
multi-core era due to their high number of parallel
processors. Programming thousands of massively parallel
threads is a big challenge for software engineers, but
understanding the performance bottlenecks of those
parallel programs on GPU architectures to improve
application performance is even more difficult. Current
approaches rely on programmers to tune their
applications by exploiting the design space
exhaustively without fully understanding the
performance characteristics of their
applications.\par
To provide insights into the performance bottlenecks of
parallel applications on GPU architectures, we propose
a simple analytical model that estimates the execution
time of massively parallel programs. The key component
of our model is estimating the number of parallel
memory requests (we call this the memory warp
parallelism) by considering the number of running
threads and memory bandwidth. Based on the degree of
memory warp parallelism, the model estimates the cost
of memory requests, thereby estimating the overall
execution time of a program. Comparisons between the
outcome of the model and the actual execution time in
several GPUs show that the geometric mean of absolute
error of our model on micro-benchmarks is 5.4\% and on
GPU computing applications is 13.3\%. All the
applications are written in the CUDA programming
language.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
keywords = "analytical model; CUDA; GPU architecture; memory level
parallelism; performance estimation; warp level
parallelism",
}
@Article{Huang:2009:EGO,
author = "Lei Huang and Deepak Eachempati and Marcus W. Hervey
and Barbara Chapman",
title = "Exploiting global optimizations for {OpenMP} programs
in the {OpenUH} compiler",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "289--290",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1504176.1504219",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The advent of new parallel architectures has increased
the need for parallel optimizing compilers to assist
developers in creating efficient code. OpenUH is a
state-of-the-art optimizing compiler, but it only
performs a limited set of optimizations for OpenMP
programs due to its conservative assumptions of shared
memory programming. These limitations may prevent some
OpenMP applications from being fully optimized to the
extent of its sequential counterpart. This paper
describes our design and implementation of a parallel
data flow framework, consisting of a Parallel Control
Flow Graph (PCFG) and a Parallel SSA (PSSA)
representation in OpenUH, to model data flow for OpenMP
programs. This framework enables the OpenUH compiler to
perform all classical scalar optimizations for OpenMP
programs, in addition to conducting OpenMP specific
optimizations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "compiler analysis; OpenMP; Parallel SSA",
}
@Article{Kainz:2009:RCM,
author = "Bernhard Kainz and Markus Grabner and Alexander Bornik
and Stefan Hauswiesner and Judith Muehl and Dieter
Schmalstieg",
title = "Ray casting of multiple volumetric datasets with
polyhedral boundaries on manycore {GPUs}",
journal = j-TOG,
volume = "28",
number = "5",
pages = "152:1--152:9",
month = dec,
year = "2009",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/1618452.1618498",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Mon Mar 15 09:01:55 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tog/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
abstract = "We present a new GPU-based rendering system for ray
casting of multiple volumes. Our approach supports a
large number of volumes, complex translucent and
concave polyhedral objects as well as CSG intersections
of volumes and geometry in any combination. The system
(including the rasterization stage) is implemented
entirely in CUDA, which allows full control of the
memory hierarchy, in particular access to high
bandwidth and low latency shared memory. High depth
complexity, which is problematic for conventional
approaches based on depth peeling, can be handled
successfully. As far as we know, our approach is the
first framework for multivolume rendering which
provides interactive frame rates when concurrently
rendering more than 50 arbitrarily overlapping volumes
on current graphics hardware.",
acknowledgement = ack-nhfb,
articleno = "152",
fjournal = "ACM Transactions on Graphics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778",
}
@Article{Klemm:2009:RTM,
author = "Michael Klemm and Matthias Bezold and Stefan Gabriel
and Ronald Veldema and Michael Philippsen",
title = "Reparallelization techniques for migrating {OpenMP}
codes in computational grids",
journal = j-CCPE,
volume = "21",
number = "3",
pages = "281--299",
day = "10",
month = mar,
year = "2009",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1356",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:30 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "6 Aug 2008",
}
@InProceedings{Klimach:2009:PCH,
author = "Harald Klimach and Sabine P. Roller",
title = "Parallel Coupling of Heterogeneous Domains with
{KOP3D} using {PACX-MPI}",
crossref = "Tuncer:2009:PCF",
volume = "67",
pages = "339--345",
year = "2009",
DOI = "https://doi.org/10.1007/978-3-540-92744-0_42",
bibdate = "Sat Dec 22 08:34:16 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-540-92744-0_42",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-540-92744-0",
book-URL = "http://www.springerlink.com/content/978-3-540-92744-0",
}
@Article{Komatitsch:2009:PHO,
author = "Dimitri Komatitsch and David Mich{\'e}a and Gordon
Erlebacher",
title = "Porting a high-order finite-element earthquake
modeling application to {NVIDIA} graphics cards using
{CUDA}",
journal = j-J-PAR-DIST-COMP,
volume = "69",
number = "5",
pages = "451--460",
month = may,
year = "2009",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Sep 1 17:08:39 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@InProceedings{Langdon:2009:FHQ,
author = "W. B. Langdon",
editor = "Franz Rothlauf",
booktitle = "{GECCO '09 Proceedings of the 11th Annual Conference
Companion on Genetic and Evolutionary Computation
Conference: Late Breaking Papers}",
title = "A fast high quality pseudo random number generator for
{nVidia CUDA}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "2511--2513",
year = "2009",
DOI = "https://doi.org/10.1145/1570256.1570353",
ISBN = "1-60558-505-X",
ISBN-13 = "978-1-60558-505-5",
LCCN = "????",
bibdate = "Fri Jan 06 09:34:05 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.cs.ucl.ac.uk/staff/W.Langdon/ftp/gp-code/random-numbers/cuda_park-miller.tar.gz",
acknowledgement = ack-nhfb,
keywords = "GGL generator (LCG(16 807, 0, $2^{31} - 1$))",
meetingname = "Proceedings of the 11th annual Conference Companion on
Genetic and Evolutionary Computation: July 8--12, 2009,
Montreal, Quebec, Canada",
}
@Article{Lee:2009:OGC,
author = "Seyong Lee and Seung-Jai Min and Rudolf Eigenmann",
title = "{OpenMP} to {GPGPU}: a compiler framework for
automatic translation and optimization",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "101--110",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1504176.1504194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "GPGPUs have recently emerged as powerful vehicles for
general-purpose high-performance computing. Although a
new Compute Unified Device Architecture (CUDA)
programming model from NVIDIA offers improved
programmability for general computing, programming
GPGPUs is still complex and error-prone. This paper
presents a compiler framework for automatic
source-to-source translation of standard OpenMP
applications into CUDA-based GPGPU applications. The
goal of this translation is to further improve
programmability and make existing OpenMP applications
amenable to execution on GPGPUs. In this paper, we have
identified several key transformation techniques, which
enable efficient GPU global memory access, to achieve
high performance. Experimental results from two
important kernels (JACOBI and SPMUL) and two NAS OpenMP
Parallel Benchmarks (EP and CG) show that the described
translator and compile-time optimizations work well on
both regular and irregular applications, leading to
performance improvements of up to 50X over the
unoptimized translation (up to 328X over serial).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "automatic translation; compiler optimization; CUDA;
GPU; OpenMP",
}
@Article{Ma:2009:CRS,
author = "Wenjing Ma and Gagan Agrawal",
title = "A compiler and runtime system for enabling data mining
applications on {GPUs}",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "287--288",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1594835.1504218",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2000.bib",
abstract = "With increasing need for accelerating data mining and
scientific data analysis on large data sets, and less
chance to improve processor performance by simply
increasing clock frequencies, multi-core architectures
and accelerators like FPGAs and GPUs have become
popular. A recent development in using GPU for general
computing has been the release of CUDA (Compute Unified
Device Architecture) by NVIDIA. CUDA allows GPU
programming with C-language-like features, thus easing
the development of non-graphics applications on a GPU.
However, several challenges still remain in programming
the GPUs with CUDA, because CUDA involves explicit
parallel programming and management of its complex
memory hierarchy, as well as allocating device memory,
moving data between CPU and device memory, and
specification of thread grid configurations.\par
In this paper, we offer a solution for the programmers
to generate CUDA code by specifying the sequential
reduction loop(s) with some information about the
parameters. With program analysis and code generation,
the applications are mapped to a GPU. Several
additional optimizations are also performed by the
middleware.\par
We have evaluated our system using three popular data
mining applications, k-means clustering, EM clustering,
and Principal Component Analysis (PCA). The speedup
that each of these applications achieve over a
sequential CPU version ranges between 20 and 50.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "CUDA; data mining; GPGPU",
}
@Article{Marowka:2009:BCT,
author = "Ami Marowka",
title = "{BSP2OMP}: a Compiler For Translating {BSP} Programs
To {OpenMP}",
journal = j-INT-J-PAR-EMER-DIST-SYS,
volume = "24",
number = "4",
pages = "293--310",
year = "2009",
CODEN = "????",
ISSN = "1744-5760 (print), 1744-5779 (electronic)",
ISSN-L = "1744-5760",
bibdate = "Thu Sep 2 08:12:37 MDT 2010",
bibsource = "http://www.informaworld.com/smpp/title~content=t713729127~link=cover;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://www.tandfonline.com/loi/gpaa20",
keywords = "BSP; BSP2OMP; EPCC; multicore; OpenMP",
}
@Article{Miguel-Alonso:2009:INS,
author = "J. Miguel-Alonso and J. Navaridas and F. J. Ridruejo",
title = "Interconnection Network Simulation Using Traces of
{MPI} Applications",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "2",
pages = "153--174",
month = apr,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:47 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=2&spage=153",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Ozgun:2009:PCB,
author = "Ozlem Ozgun and Raj Mittra and Mustafa
Kuzuo{\u{g}}lu",
title = "Parallelized Characteristic Basis Finite Element
Method ({CBFEM--MPI}) --- a non-iterative domain
decomposition algorithm for electromagnetic scattering
problems",
journal = j-J-COMPUT-PHYS,
volume = "228",
number = "6",
pages = "2225--2238",
day = "1",
month = apr,
year = "2009",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1016/j.jcp.2008.12.002",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Mon Jan 2 22:14:07 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999108006293",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Rashti:2009:SAM,
author = "Mohammad J. Rashti and Ahmad Afsahi",
title = "A Speculative and Adaptive {MPI} Rendezvous Protocol
Over {RDMA}-enabled Interconnects",
journal = j-INT-J-PARALLEL-PROG,
volume = "37",
number = "2",
pages = "223--246",
month = apr,
year = "2009",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:47 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=37&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=37&issue=2&spage=223",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Schneider:2009:CPM,
author = "Scott Schneider and Jae-Seung Yeom and Benjamin Rose
and John C. Linford and Adrian Sandu and Dimitrios S.
Nikolopoulos",
title = "A comparison of programming models for multiprocessors
with explicitly managed memory hierarchies",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "131--140",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1594835.1504197",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "On multiprocessors with explicitly managed memory
hierarchies (EMM), software has the responsibility of
moving data in and out of fast local memories. This
task can be complex and error-prone even for expert
programmers. Before we can allow compilers to handle
this complexity for us, we must identify the
abstractions that are general enough to allow us to
write applications with reasonable effort, yet specific
enough to exploit the vast on-chip memory bandwidth of
EMM multi-processors. To this end, we compare two
programming models against hand-tuned codes on the STI
Cell, paying attention to programmability and
performance. The first programming model, Sequoia,
abstracts the memory hierarchy as private address
spaces, each corresponding to a parallel task. The
second, Cellgen, is a new framework which provides
OpenMP-like semantics and the abstraction of a shared
address space divided into private and shared data. We
compare three applications programmed using these
models against their hand-optimized counterparts in
terms of abstractions, programming complexity, and
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "cell be; explicitly managed memory hierarchies;
programming models",
}
@Article{Schwarz:2009:GFG,
author = "Michael Schwarz and Marc Stamminger",
title = "{GPU}: Fast {GPU}-based Adaptive Tessellation with
{CUDA}",
journal = j-CGF,
volume = "28",
number = "2",
pages = "365--374",
month = apr,
year = "2009",
CODEN = "CGFODY",
DOI = "https://doi.org/10.1111/j.1467-8659.2009.01376.x",
ISSN = "0167-7055 (print), 1467-8659 (electronic)",
ISSN-L = "0167-7055",
bibdate = "Sat May 11 13:27:16 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cgf.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Com{\-}pu{\-}ter Graphics Forum",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1467-8659/",
onlinedate = "27 Mar 2009",
}
@Article{Tabakin:2009:QPE,
author = "Frank Tabakin and Bruno Juli{\'a}-D{\'\i}az",
title = "{QCMPI}: a parallel environment for quantum
computing",
journal = j-COMP-PHYS-COMM,
volume = "180",
number = "6",
pages = "948--964",
month = jun,
year = "2009",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2008.11.021",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Feb 13 23:42:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465508004141",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Tallent:2009:EPM,
author = "Nathan R. Tallent and John M. Mellor-Crummey",
title = "Effective performance measurement and analysis of
multithreaded applications",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "229--240",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1504176.1504210",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Understanding why the performance of a multithreaded
program does not improve linearly with the number of
cores in a shared-memory node populated with one or
more multicore processors is a problem of growing
practical importance. This paper makes three
contributions to performance analysis of multithreaded
programs. First, we describe how to measure and
attribute {\em parallel idleness}, namely, where
threads are stalled and unable to work. This technique
applies broadly to programming models ranging from
explicit threading ({\em e.g.}, Pthreads) to
higher-level models such as Cilk and OpenMP. Second, we
describe how to measure and attribute {\em parallel
overhead\/} -- when a thread is performing
miscellaneous work other than executing the user's
computation. By employing a combination of compiler
support and post-mortem analysis, we incur no
measurement cost beyond normal profiling to glean this
information. Using {\em idleness\/} and {\em
overhead\/} metrics enables one to pinpoint areas of an
application where concurrency should be increased (to
reduce idleness), decreased (to reduce overhead), or
where the present parallelization is hopeless (where
idleness and overhead are both high). Third, we
describe how to measure and attribute arbitrary
performance metrics for high-level multithreaded
programming models, such as Cilk. This requires
bridging the gap between the expression of logical
concurrency in programs and its realization at run-time
as it is adaptively partitioned and scheduled onto a
pool of threads. We have prototyped these ideas in the
context of Rice University's HPCToolkit performance
tools. We describe our approach, implementation, and
experiences applying this approach to measure and
attribute work, idleness, and overhead in executions of
Cilk programs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "call path profiling; hpctoolkit; multithreaded
programming models; performance analysis",
}
@Article{Thakur:2009:TSE,
author = "Rajeev Thakur and William Gropp",
title = "Test suite for evaluating performance of multithreaded
{MPI} communication",
journal = j-PARALLEL-COMPUTING,
volume = "35",
number = "12",
pages = "608--617",
month = dec,
year = "2009",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:11 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Tournavitis:2009:THA,
author = "Georgios Tournavitis and Zheng Wang and Bj{\"o}rn
Franke and Michael F. P. O'Boyle",
title = "Towards a holistic approach to auto-parallelization:
integrating profile-driven parallelism detection and
machine-learning based mapping",
journal = j-SIGPLAN,
volume = "44",
number = "6",
pages = "177--187",
month = jun,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1542476.1542496",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Jun 16 14:41:16 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Compiler-based auto-parallelization is a much studied
area, yet has still not found wide-spread application.
This is largely due to the poor exploitation of
application parallelism, subsequently resulting in
performance levels far below those which a skilled
expert programmer could achieve. We have identified two
weaknesses in traditional parallelizing compilers and
propose a novel, integrated approach, resulting in
significant performance improvements of the generated
parallel code. Using profile-driven parallelism
detection we overcome the limitations of static
analysis, enabling us to identify more application
parallelism and only rely on the user for final
approval. In addition, we replace the traditional
target-specific and inflexible mapping heuristics with
a machine-learning based prediction mechanism,
resulting in better mapping decisions while providing
more scope for adaptation to different target
architectures. We have evaluated our parallelization
strategy against the NAS and SPEC OMP benchmarks and
two different multi-core platforms (dual quad-core
Intel Xeon SMP and dual-socket QS20 Cell blade). We
demonstrate that our approach not only yields
significant improvements when compared with
state-of-the-art parallelizing compilers, but comes
close to and sometimes exceeds the performance of
manually parallelized codes. On average, our
methodology achieves 96\% of the performance of the
hand-tuned OpenMP NAS and SPEC parallel benchmarks on
the Intel Xeon platform and gains a significant speedup
for the IBM Cell platform, demonstrating the potential
of profile-guided and machine-learning based
parallelization for complex multi-core platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "auto-parallelization; machine-learning based
parallelism mapping; OpenMP; profile-driven parallelism
detection",
}
@Article{Udupa:2009:SES,
author = "Abhishek Udupa and R. Govindarajan and Matthew J.
Thazhuthaveetil",
title = "Synergistic execution of stream programs on multicores
with accelerators",
journal = j-SIGPLAN,
volume = "44",
number = "7",
pages = "99--108",
month = jul,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1542452.1542466",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Jun 26 12:07:39 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2000.bib",
abstract = "The StreamIt programming model has been proposed to
exploit parallelism in streaming applications on
general purpose multicore architectures. The StreamIt
graphs describe task, data and pipeline parallelism
which can be exploited on accelerators such as Graphics
Processing Units (GPUs) or CellBE which support
abundant parallelism in hardware.\par
In this paper, we describe a novel method to
orchestrate the execution of a StreamIt program on a
multicore platform equipped with an accelerator. The
proposed approach identifies, using profiling, the
relative benefits of executing a task on the
superscalar CPU cores and the accelerator. We formulate
the problem of partitioning the work between the CPU
cores and the GPU, taking into account the latencies
for data transfers and the required buffer layout
transformations associated with the partitioning, as an
integrated Integer Linear Program (ILP) which can then
be solved by an ILP solver. We also propose an
efficient heuristic algorithm for the work partitioning
between the CPU and the GPU, which provides solutions
which are within 9.05\% of the optimal solution on an
average across the benchmark suite. The partitioned
tasks are then software pipelined to execute on the
multiple CPU cores and the Streaming Multiprocessors
(SMs) of the GPU. The software pipelining algorithm
orchestrates the execution between CPU cores and the
GPU by emitting the code for the CPU and the GPU, and
the code for the required data transfers. Our
experiments on a platform with 8 CPU cores and a
GeForce 8800 GTS 512 GPU show a geometric mean speedup
of 6.84X with a maximum of 51.96X over a single
threaded CPU execution across the StreamIt benchmarks.
This is a 18.9\% improvement over a partitioning
strategy that maps only the filters that cannot be
executed on the GPU -- the filters with state that is
persistent across firings -- onto the CPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "CUDA; GPU programming; partitioning; software
pipelining; stream programming",
}
@Article{Vo:2009:FVP,
author = "Anh Vo and Sarvani Vakkalanka and Michael DeLisi and
Ganesh Gopalakrishnan and Robert M. Kirby and Rajeev
Thakur",
title = "Formal verification of practical {MPI} programs",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "261--270",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1594835.1504214",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "This paper considers the problem of formal
verification of MPI programs operating under a fixed
test harness for safety properties without building
verification models. In our approach, we directly
model-check the MPI/C source code, executing its
interleavings with the help of a verification
scheduler. Unfortunately, the total feasible number of
interleavings is exponential, and impractical to
examine even for our modest goals. Our earlier
publications formalized and implemented a partial order
reduction approach that avoided exploring equivalent
interleavings, and presented a verification tool called
ISP. This paper presents algorithmic and engineering
innovations to ISP, including the use of OpenMP
parallelization, that now enables it to handle
practical MPI programs, including:(i) ParMETIS --- a
widely used hypergraph partitioner, and (ii) MADRE ---
a Memory Aware Data Re-distribution Engine, both
developed outside our group. Over these benchmarks, ISP
has automatically verified up to 14K lines of MPI/C
code, producing error traces of deadlocks and assertion
violations within seconds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "distributed programming; dynamic partial order
reduction; message passing interface; model checking;
MPI",
}
@Article{Walters:2009:RBF,
author = "John Paul Walters and Vipin Chaudhary",
title = "Replication-Based Fault Tolerance for {MPI}
Applications",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "20",
number = "7",
pages = "997--1010",
month = jul,
year = "2009",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2008.172",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu May 13 12:06:56 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Wang:2009:MPM,
author = "Zheng Wang and Michael F. P. O'Boyle",
title = "Mapping parallelism to multi-cores: a machine learning
based approach",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "75--84",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1504176.1504189",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The efficient mapping of program parallelism to
multi-core processors is highly dependent on the
underlying architecture. This paper proposes a portable
and automatic compiler-based approach to mapping such
parallelism using machine learning. It develops two
predictors: a data sensitive and a data insensitive
predictor to select the best mapping for parallel
programs. They predict the number of threads and the
scheduling policy for any given program using a model
learnt off-line. By using low-cost profiling runs, they
predict the mapping for a new unseen program across
multiple input data sets. We evaluate our approach by
selecting parallelism mapping configurations for OpenMP
programs on two representative but different multi-core
platforms (the Intel Xeon and the Cell processors).
Performance of our technique is stable across programs
and architectures. On average, it delivers above 96\%
performance of the maximum available on both platforms.
It achieve, on average, a 37\% (up to 17.5 {\em
times\/}) performance improvement over the OpenMP
runtime default scheme on the Cell platform. Compared
to two recent prediction models, our predictors achieve
better performance with a significant lower profiling
cost.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "artificial neural networks; compiler optimization;
machine learning; performance modeling; support vector
machine",
}
@Article{Xue:2009:MSR,
author = "Ruini Xue and Xuezheng Liu and Ming Wu and Zhenyu Guo
and Wenguang Chen and Weimin Zheng and Zheng Zhang and
Geoffrey Voelker",
title = "{MPIWiz}: subgroup reproducible replay of {MPI}
applications",
journal = j-SIGPLAN,
volume = "44",
number = "4",
pages = "251--260",
month = apr,
year = "2009",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1504176.1504213",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 9 08:40:49 MDT 2009",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Message Passing Interface (MPI) is a widely used
standard for managing coarse-grained concurrency on
distributed computers. Debugging parallel MPI
applications, however, has always been a particularly
challenging task due to their high degree of concurrent
execution and non-deterministic behavior. Deterministic
replay is a potentially powerful technique for
addressing these challenges, with existing MPI replay
tools adopting either data-replay or order-replay
approaches. Unfortunately, each approach has its
tradeoffs. Data-replay generates substantial log sizes
by recording every communication message. Order-replay
generates small logs, but requires all processes to be
replayed together. We believe that these drawbacks are
the primary reasons that inhibit the wide adoption of
deterministic replay as the critical enabler of cyclic
debugging of MPI applications.\par
This paper describes {\em subgroup reproducible
replay\/} (SRR), a hybrid deterministic replay method
that provides the benefits of both data-replay and
order-replay while balancing their trade-offs. SRR
divides all processes into disjoint groups. It records
the contents of messages crossing group boundaries as
in data-replay, but records just message orderings for
communication within a group as in order-replay. In
this way, SRR can exploit the communication locality of
traffic patterns in MPI applications. During replay,
developers can then replay each group individually. SRR
reduces recording overhead by not recording intra-group
communication, and reduces replay overhead by limiting
the size of each replay group. Exposing these tradeoffs
gives the user the necessary control for making
deterministic replay practical for MPI
applications.\par
We have implemented a prototype, MPIWiz, to demonstrate
and evaluate SRR. MPIWiz employs a replay framework
that allows transparent binary instrumentation of both
library and system calls. As a result, MPIWiz replays
MPI applications with no source code modification and
relinking, and handles non-determinism in both MPI and
OS system calls. Our preliminary results show that
MPIWiz can reduce recording overhead by over a factor
of four relative to data-replay, yet without requiring
the entire application to be replayed as in
order-replay. Recording increases execution time by
27\% while the application can be replayed in just 53\%
of its base execution time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "distributed debugging; message passing interface;
non-determinism; record and replay",
}
@Article{Yang:2009:DBM,
author = "Chao-Tung Yang and Kuan-Chou Lai",
title = "A directive-based {MPI} code generator for {Linux PC}
clusters",
journal = j-J-SUPERCOMPUTING,
volume = "50",
number = "2",
pages = "177--207",
month = nov,
year = "2009",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Aug 25 08:38:43 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=50&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=50&issue=2&spage=177",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@InProceedings{Yilmaz:2009:HPC,
author = "E. Yilmaz and R. U. Payli and H. U. Akay and A. Ecer",
title = "Hybrid Parallelism for {CFD} Simulations: Combining
{MPI} with {OpenMP}",
crossref = "Tuncer:2009:PCF",
volume = "67",
pages = "401--408",
year = "2009",
DOI = "https://doi.org/10.1007/978-3-540-92744-0_50",
bibdate = "Sat Dec 22 08:34:16 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-540-92744-0_50",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-540-92744-0",
book-URL = "http://www.springerlink.com/content/978-3-540-92744-0",
}
@Article{Ayguade:2010:EOS,
author = "Eduard Ayguad{\'e} and Rosa M. Badia and Pieter
Bellens and Daniel Cabrera and Alejandro Duran Roger
Ferrer and Marc Gonz{\'a}lez and Francisco Igual and
Daniel Jim{\'e}nez-Gonz{\'a}lez and Jes{\'u}s Labarta
and Luis Martinell and Xavier Martorell and Rafael Mayo
and Josep M. P{\'e}rez and Judit Planas and Enrique S.
Quintana-Ort{\'\i}",
title = "Extending {OpenMP} to Survive the Heterogeneous
Multi-Core Era",
journal = j-INT-J-PARALLEL-PROG,
volume = "38",
number = "5--6",
pages = "440--459",
month = oct,
year = "2010",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:49 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=440",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Baghsorkhi:2010:APM,
author = "Sara S. Baghsorkhi and Matthieu Delahaye and Sanjay J.
Patel and William D. Gropp and Wen-mei W. Hwu",
title = "An adaptive performance modeling tool for {GPU}
architectures",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "105--114",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693470",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents an analytical model to predict the
performance of\par
general-purpose applications on a GPU architecture. The
model is designed to provide performance information to
an auto-tuning compiler and assist it in narrowing down
the search to the more promising implementations. It
can also be incorporated into a tool to help
programmers better assess the performance bottlenecks
in their code. We analyze each GPU kernel and identify
how the kernel exercises major GPU microarchitecture
features. To identify the performance bottlenecks
accurately, we introduce an abstract interpretation of
a GPU kernel, {\em work flow graph}, based on which we
estimate the execution time of a GPU kernel. We
validated our performance model on the NVIDIA GPUs
using CUDA (Compute Unified Device Architecture). For
this purpose, we used data parallel benchmarks that
stress different GPU microarchitecture events such as
uncoalesced memory accesses, scratch-pad memory bank
conflicts, and control flow divergence, which must be
accurately modeled but represent challenges to the
analytical performance models. The proposed model
captures full system complexity and shows high accuracy
in predicting the performance trends of different
optimized kernel implementations. We also describe our
approach to extracting the performance model
automatically from a kernel code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "analytical model; GPU; parallel programming;
performance estimation",
}
@Article{Balaji:2010:FGM,
author = "Pavan Balaji and Darius Buntinas and David Goodell and
William Gropp and Rajeev Thakur",
title = "Fine-Grained Multithreading Support for Hybrid
Threaded {MPI} Programming",
journal = j-IJHPCA,
volume = "24",
number = "1",
pages = "49--57",
month = feb,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009360206",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/1/49.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Balaji:2010:IND,
author = "Pavan Balaji and Anthony Chan and William Gropp and
Rajeev Thakur and Ewing Lusk",
title = "The Importance of Non-Data-Communication Overheads in
{MPI}",
journal = j-IJHPCA,
volume = "24",
number = "1",
pages = "5--15",
month = feb,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009359258",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/1/5.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Blas:2010:IEF,
author = "Javier Garcia Blas and Florin Isaila and Jesus
Carretero and David Singh and Felix
Garcia-Carballeira",
title = "Implementation and Evaluation of File Write-Back and
Prefetching for {MPI-IO} Over {GPFS}",
journal = j-IJHPCA,
volume = "24",
number = "1",
pages = "78--92",
month = feb,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009359015",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/1/78.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Brightwell:2010:EDA,
author = "Ron Brightwell",
title = "Exploiting Direct Access Shared Memory for {MPI} on
Multi-Core Processors",
journal = j-IJHPCA,
volume = "24",
number = "1",
pages = "69--77",
month = feb,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009359014",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/1/69.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Broquedis:2010:FEO,
author = "Fran{\c{c}}ois Broquedis and Nathalie Furmento and
Brice Goglin and Pierre-Andr{\'e} Wacrenier and Raymond
Namyst",
title = "{ForestGOMP}: An Efficient {OpenMP} Environment for
{NUMA} Architectures",
journal = j-INT-J-PARALLEL-PROG,
volume = "38",
number = "5--6",
pages = "418--439",
month = oct,
year = "2010",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:49 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=418",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Bull:2010:PEM,
author = "J. Mark Bull and James Enright and Xu Guo and Chris
Maynard and Fiona Reid",
title = "Performance Evaluation of Mixed-Mode {OpenMP\slash
MPI} Implementations",
journal = j-INT-J-PARALLEL-PROG,
volume = "38",
number = "5--6",
pages = "396--417",
month = oct,
year = "2010",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:49 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=396",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Campanoni:2010:HFP,
author = "Simone Campanoni and Giovanni Agosta and Stefano
Crespi Reghizzi and Andrea Di Biagio",
title = "A highly flexible, parallel virtual machine: design
and experience of {ILDJIT}",
journal = j-SPE,
volume = "40",
number = "2",
pages = "177--207",
day = "??",
month = feb,
year = "2010",
CODEN = "SPEXBL",
DOI = "https://doi.org/10.1002/spe.950",
ISSN = "0038-0644 (print), 1097-024X (electronic)",
ISSN-L = "0038-0644",
bibdate = "Wed Mar 17 10:16:22 MDT 2010",
bibsource = "http://www.interscience.wiley.com/jpages/0038-0644;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Soft{\-}ware\emdash Prac{\-}tice and Experience",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X",
onlinedate = "Jan 14 2010 4:49AM",
}
@Article{Cardoso:2010:MSO,
author = "M. C. Cardoso and F. M. Costa",
title = "{MPI} support on opportunistic grids based on the
{InteGrade} middleware",
journal = j-CCPE,
volume = "22",
number = "3",
pages = "343--357",
day = "10",
month = mar,
year = "2010",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1479",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:41 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "21 Sep 2009",
}
@Article{Carter:2010:PLN,
author = "John D. Carter and William B. Gardner and Gary
Grewal",
title = "The {Pilot} library for novice {MPI} programmers",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "351--352",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693509",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The Pilot library is a new method for programming
MPI-enabled clusters in C, targeted at novice parallel
programmers. Formal elements from Communicating
Sequential Processes (CSP) are used to realize a
process/channel model of parallel computation that
reduces opportunities for deadlock and other
communication errors. This simple model, plus an
application programming inter-face (API) styled after
C's formatted I/O, are designed to make the library
easy to learn. The Pilot library exists as a thin layer
on top of any standard Message Passing Interface (MPI)
implementation, preserving MPI's portability and
efficiency, with little performance overhead arising as
result of Pilot's additional features.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "C; cluster programming; collective operations;
deadlock detection; high-performance computing; MPI",
}
@Article{Casas:2010:APD,
author = "Marc Casas and Rosa M. Badia and Jes{\'u}s Labarta",
title = "Automatic Phase Detection and Structure Extraction of
{MPI} Applications",
journal = j-IJHPCA,
volume = "24",
number = "3",
pages = "335--360",
month = aug,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009360039",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:46 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/3.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/3/335.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Cheng:2010:BRBb,
author = "Jie Cheng",
title = "Book Review: {{\booktitle{CUDA by Example: An
Introduction to General-Purpose GPU Programming}}, by
Jason Sanders and Edward Kandrot, ISBN-13
978-0-13-138768-3}",
journal = j-SCPE,
volume = "11",
number = "4",
pages = "401--401",
month = dec,
year = "2010",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Sat Nov 10 09:03:30 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib;
http://www.scpe.org/index.php/scpe/issue/view/91",
note = "See \cite{Sanders:2010:CEI}.",
URL = "http://www.scpe.org/index.php/scpe/article/view/663",
acknowledgement = ack-nhfb,
remark = "Special Issue: Network Management in Distributed
Systems.",
}
@Article{Cho:2010:OPP,
author = "S. M. Cho and D. W. Im and O. Y. Jang and H. J. Song
and B. D. Paulovicks and V. Sheinin and H. Yeo",
title = "{OpenCL} and parallel primitives for digital {TV}
applications",
journal = j-IBM-JRD,
volume = "54",
number = "5",
pages = "7:1--7:14",
month = "????",
year = "2010",
CODEN = "IBMJAE",
DOI = "https://doi.org/10.1147/JRD.2010.2062050",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Sun Feb 20 14:29:19 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ibmjrd.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.research.ibm.com/journal/",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
}
@Article{Chou:2010:CMI,
author = "Yu-Cheng Chou and Stephen S. Nestinger and Harry H.
Cheng",
title = "{Ch MPI}: Interpretive Parallel Computing in {C}",
journal = j-COMPUT-SCI-ENG,
volume = "12",
number = "2",
pages = "54--67",
month = mar # "\slash " # apr,
year = "2010",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2010.36",
ISSN = "0740-7475 (print), 1558-1918 (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu May 13 11:08:14 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Dickens:2010:HPI,
author = "Phillip M. Dickens and Jeremy Logan",
title = "A high performance implementation of {MPI-IO} for a
{Lustre} file system environment",
journal = j-CCPE,
volume = "22",
number = "11",
pages = "1433--1449",
day = "10",
month = aug,
year = "2010",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1491",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:46 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "8 Sep 2009",
}
@TechReport{Du:2010:COT,
author = "Peng Du and Rick Weber and Piotr Luszczek and
Stanimire Tomov and Gregory Peterson and Jack
Dongarra",
title = "From {CUDA} to {OpenCL}: Towards a
Performance-portable Solution for Multi-platform {GPU}
Programming",
type = "LAPACK Working Note",
number = "228",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
day = "6",
month = sep,
year = "2010",
bibdate = "Wed Aug 24 12:36:41 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "UT-CS-10-656.",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn228.pdf",
acknowledgement = ack-nhfb,
}
@Article{FerreiradaSilva:2010:PBC,
author = "Adelino {Ferreira da Silva}",
title = "\pkg{cudaBayesreg}: {Bayesian} Computation in {CUDA}",
journal = j-R-JOURNAL,
volume = "2",
number = "2",
pages = "48--55",
month = dec,
year = "2010",
CODEN = "????",
ISSN = "2073-4859",
bibdate = "Thu Aug 13 15:54:57 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/rjournal.bib",
URL = "http://journal.r-project.org/archive/2010-2/RJournal_2010-2_Ferreira~da-Silva.pdf",
acknowledgement = ack-r-project,
fjournal = "The R Journal",
journal-URL = "http://journal.r-project.org/",
}
@Article{Gelado:2010:ADS,
author = "Isaac Gelado and Javier Cabezas and Nacho Navarro and
John E. Stone and Sanjay Patel and Wen-mei W. Hwu",
title = "An asymmetric distributed shared memory model for
heterogeneous parallel systems",
journal = j-SIGPLAN,
volume = "45",
number = "3",
pages = "347--358",
month = mar,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1735970.1736059",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Mar 17 13:46:56 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/linux.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "Heterogeneous computing combines general purpose CPUs
with accelerators to efficiently execute both
sequential control-intensive and data-parallel phases
of applications. Existing programming models for
heterogeneous computing rely on programmers to
explicitly manage data transfers between the CPU system
memory and accelerator memory.\par
This paper presents a new programming model for
heterogeneous computing, called Asymmetric Distributed
Shared Memory (ADSM), that maintains a shared logical
memory space for CPUs to access objects in the
accelerator physical memory but not vice versa. The
asymmetry allows light-weight implementations that
avoid common pitfalls of symmetrical distributed shared
memory systems. ADSM allows programmers to assign data
objects to performance critical methods. When a method
is selected for accelerator execution, its associated
data objects are allocated within the shared logical
memory space, which is hosted in the accelerator
physical memory and transparently accessible by the
methods executed on CPUs.\par
We argue that ADSM reduces programming efforts for
heterogeneous computing systems and enhances
application portability. We present a software
implementation of ADSM, called GMAC, on top of CUDA in
a GNU/Linux environment. We show that applications
written in ADSM and running on top of GMAC achieve
performance comparable to their counterparts using
programmer-managed data transfers. This paper presents
the GMAC system and evaluates different design choices.
We further suggest additional architectural support
that will likely allow GMAC to achieve higher
application performance than the current CUDA model.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "asymmetric distributed shared memory; data-centric
programming models; heterogeneous systems",
}
@Article{Granat:2010:PSS,
author = "Robert Granat and Bo Kagstrom",
title = "Parallel Solvers for {Sylvester}-Type Matrix Equations
with Applications in Condition Estimation, {Part I}:
Theory and Algorithms",
journal = j-TOMS,
volume = "37",
number = "3",
pages = "32:1--32:32",
month = sep,
year = "2010",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1824801.1824810",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon Sep 27 10:15:50 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Parallel ScaLAPACK-style algorithms for solving eight
common standard and generalized Sylvester-type matrix
equations and various sign and transposed variants are
presented. All algorithms are blocked variants based on
the Bartels--Stewart method and involve four major
steps: reduction to triangular form, updating the
right-hand side with respect to the reduction,
computing the solution to the reduced triangular
problem, and transforming the solution back to the
original coordinate system. Novel parallel algorithms
for solving reduced triangular matrix equations based
on wavefront-like traversal of the right-hand side
matrices are presented together with a generic
scalability analysis. These algorithms are used in
condition estimation and new robust parallel sep$^{ -
1}$ -estimators are developed. Experimental results
from three parallel platforms, including results from a
mixed OpenMP/MPI platform, are presented and analyzed
using several performance and accuracy metrics. The
analysis includes results regarding general and
triangular parallel solvers as well as parallel
condition estimators.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Mathematical Software",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "condition estimation; Eigenvalue problems; library
software; Sylvester matrix equations",
}
@Article{Gutierrez:2010:QCS,
author = "Eladio Guti{\'e}rrez and Sergio Romero and Mar{\'\i}a
A. Trenas and Emilio L. Zapata",
title = "Quantum computer simulation using the {CUDA}
programming model",
journal = j-COMP-PHYS-COMM,
volume = "181",
number = "2",
pages = "283--300",
month = feb,
year = "2010",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2009.09.021",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 09:54:27 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465509003117",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Hadjidoukas:2010:NOP,
author = "Panagiotis E. Hadjidoukas and Laurent Amsaleg",
title = "Nested {OpenMP} Parallelization of a Hierarchical Data
Clustering Algorithm",
journal = j-PARALLEL-PROCESS-LETT,
volume = "20",
number = "2",
pages = "187--208",
month = jun,
year = "2010",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626410000144",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Thu Sep 2 09:08:12 MDT 2010",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Hamid:2010:CMB,
author = "Nor Asilah Wati Abdul Hamid and Paul Coddington",
title = "Comparison of {MPI} Benchmark Programs on Shared
Memory and Distributed Memory Machines (Point-to-Point
Communication)",
journal = j-IJHPCA,
volume = "24",
number = "4",
pages = "469--483",
month = nov,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342010371106",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Sep 6 15:14:35 MDT 2011",
bibsource = "http://hpc.sagepub.com/content/24/4.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/4/469.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "June 7, 2010",
}
@Article{Hawick:2010:PGC,
author = "K. A. Hawick and A. Leist and D. P. Playne",
title = "Parallel graph component labelling with {GPUs} and
{CUDA}",
journal = j-PARALLEL-COMPUTING,
volume = "36",
number = "12",
pages = "655--678",
month = dec,
year = "2010",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Nov 1 10:18:30 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Hong:2010:IGP,
author = "Sunpyo Hong and Hyesoon Kim",
title = "An integrated {GPU} power and performance model",
journal = j-COMP-ARCH-NEWS,
volume = "38",
number = "3",
pages = "280--289",
month = jun,
year = "2010",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/1816038.1815998",
ISSN = "0163-5964 (ACM), 0884-7495 (IEEE)",
ISSN-L = "0163-5964",
bibdate = "Tue Jul 6 14:11:46 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "GPU architectures are increasingly important in the
multi-core era due to their high number of parallel
processors. Performance optimization for multi-core
processors has been a challenge for programmers.
Furthermore, optimizing for power consumption is even
more difficult. Unfortunately, as a result of the high
number of processors, the power consumption of
many-core processors such as GPUs has increased
significantly.\par
Hence, in this paper, we propose an integrated power
and performance (IPP) prediction model for a GPU
architecture to predict the optimal number of active
processors for a given application. The basic intuition
is that when an application reaches the peak memory
bandwidth, using more cores does not result in
performance improvement.\par
We develop an empirical power model for the GPU. Unlike
most previous models, which require measured execution
times, hardware performance counters, or architectural
simulations, IPP predicts execution times to calculate
dynamic power events. We then use the outcome of IPP to
control the number of running cores. We also model the
increases in power consumption that resulted from the
increases in temperature.\par
With the predicted optimal number of active cores, we
show that we can save up to 22.09\%of runtime GPU
energy consumption and on average 10.99\% of that for
the five memory bandwidth-limited benchmarks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
keywords = "analytical model; CUDA; energy; GPU architecture;
performance; power estimation",
}
@Article{Huang:2010:ELA,
author = "Lei Huang and Haoqiang Jin and Liqi Yi and Barbara
Chapman",
title = "Enabling locality-aware computations in {OpenMP}",
journal = j-SCI-PROG,
volume = "18",
number = "3--4",
pages = "169--181",
month = "????",
year = "2010",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.3233/SPR-2010-0307",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Dec 13 19:01:33 MST 2011",
bibsource = "http://www.iospress.nl/journal/scientific-programming/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Isaila:2010:SMP,
author = "Florin Isaila and Francisco Javier Garcia Blas and
Jes{\'u}s Carretero and Wei-keng Liao and Alok
Choudhary",
title = "A Scalable {Message Passing Interface} Implementation
of an Ad-Hoc Parallel {I/O} system",
journal = j-IJHPCA,
volume = "24",
number = "2",
pages = "164--184",
month = may,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009347890",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:46 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/2.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/2/164.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Januszewski:2010:ANS,
author = "M. Januszewski and M. Kostur",
title = "Accelerating numerical solution of stochastic
differential equations with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "181",
number = "1",
pages = "183--188",
month = jan,
year = "2010",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2009.09.009",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 09:54:27 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465509002999",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Jost:2010:EUH,
author = "Gabriele Jost and Bob Robins",
title = "Experiences using hybrid {MPI\slash OpenMP} in the
real world: Parallelization of a {$3$D} {CFD} solver
for multi-core node clusters",
journal = j-SCI-PROG,
volume = "18",
number = "3--4",
pages = "127--138",
month = "????",
year = "2010",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.3233/SPR-2010-0308",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Dec 13 19:01:33 MST 2011",
bibsource = "http://www.iospress.nl/journal/scientific-programming/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@InProceedings{Kamal:2010:EIN,
author = "A. A. Kamal and A. M. Youssef",
title = "Enhanced implementation of the {NTRUEncrypt} algorithm
using graphics cards",
crossref = "Chaudhuri:2010:PIC",
pages = "168--174",
year = "2010",
DOI = "https://doi.org/10.1109/PDGC.2010.5679887",
bibdate = "Thu Apr 21 10:40:48 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The NTRU encryption algorithm, also known as
NTRUEncrypt, is a parameterized family of lattice-based
public key cryptosystems that has been accepted to the
IEEE P1363 standards under the specifications for
lattice-based public-key cryptography (IEEE P1363.1).
The operations of the NTRU encryption algorithm show
good characteristics for data parallel processing which
makes the NTRU a good candidate to benefit from the
high degree of parallelism available in modern graphics
processing units (GPUs). In this paper, we investigate
different GPU implementation options for the NTRU
encryption algorithm. Our implementation, on the NVIDIA
GTX275 GPU, using the CUDA framework, achieves about 77
MB/s for NTRU with the parameter set $ (N, q, p) =
(1171, 2048, 3) $.",
acknowledgement = ack-nhfb,
keywords = "ANSI X9.98-2010; NTRUEncrypt",
}
@Article{Kapinos:2010:PPP,
author = "Paul Kapinos and Dieter an Mey",
title = "Productivity and Performance Portability of the
{OpenMP 3.0} Tasking Concept When Applied to an
Engineering Code Written in {Fortran 95}",
journal = j-INT-J-PARALLEL-PROG,
volume = "38",
number = "5--6",
pages = "379--395",
month = oct,
year = "2010",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:49 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=379",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Khanna:2010:NMG,
author = "Gaurav Khanna and Justin McKennon",
title = "Numerical modeling of gravitational wave sources
accelerated by {OpenCL}",
journal = j-COMP-PHYS-COMM,
volume = "181",
number = "9",
pages = "1605--1611",
month = sep,
year = "2010",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2010.05.014",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 09:54:30 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465510001682",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Book{Kirk:2010:PMP,
author = "David B. Kirk and Wen-mei W. Hwu",
title = "Programming Massively Parallel Processors: a Hands-on
Approach",
publisher = pub-MORGAN-KAUFMANN,
address = pub-MORGAN-KAUFMANN:adr,
pages = "xviii + 258",
year = "2010",
ISBN = "0-12-381472-3",
ISBN-13 = "978-0-12-381472-2",
LCCN = "QA76.642 .K57 2010",
bibdate = "Thu Jul 29 13:33:50 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/master.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib;
z3950.bibsys.no:2100/BIBSYS;
z3950.loc.gov:7090/Voyager",
note = "Chapter 7 (pages 125--140) discusses GPU
floating-point considerations.",
acknowledgement = ack-nhfb,
keywords = "CUDA; nVIDIA",
libnote = "Not yet in my library.",
subject = "parallel programming (computer science); parallel
processing (electronic computers); multiprocessors;
computer architecture",
tableofcontents = "1: Introduction \\
1.1 GPUs as Parallel Computers \\
1.2 Architecture of a Modern GPU \\
1.3 Why More Speed or Parallelism? \\
1.4 Parallel Programming Languages and Models \\
1.5 Overarching Goals \\
1.6 Organization of the Book \\
2: History of GPU Computing \\
2.1. Evolution of Graphics Pipelines The Era of Fixed
Function Graphics Pipeline Evolution of Programmable
Real-Time Graphics Unified Graphics and Computing
Processors \\
2.2. GPGPU: an Intermediate Step Scalable GPUs Recent
Developments Future Trends \\
3: Introduction to CUDA \\
3.1. Data Parallelism \\
3.2. CUDA Program Structure \\
3.3. A Matrix--Matrix Multiplication Example \\
3.4. Device Memories and Data Transfer \\
3.5. Kernel Functions and Threading \\
3.6. Summary Function Declarations Kernel Launch
Predefined Variables Runtime API \\
4: CUDA Threads \\
4.1. CUDA Thread Organization \\
4.2. More on BlockIdx and ThreadIdx \\
4.3. Synchronization and Transparent Scalability \\
4.4. Thread Assignment \\
4.5. Thread Scheduling and Latency Tolerance \\
4.6. Summary \\
5: CUDA Memories \\
5.1. Importance of Memory Access Efficiency \\
5.2. CUDA Device Memory Types \\
5.3. A Strategy for Reducing Global Memory Traffic \\
5.4. Memory as a Limiting Factor to Parallelism \\
5.5. Summary \\
6: Performance Considerations \\
6.1. More on Thread Execution \\
6.2. Global Memory Bandwidth \\
6.3. Dynamic Partitioning of SM Resources \\
6.4. Data Prefetching \\
6.5. Instruction Mix \\
6.6. Thread Granularity \\
6.7. Measured Performance and Summary \\
\\
7: Floating-Point Considerations \\
7.1. Floating-Point Format Normalized representation of
M Excess encoding of E \\
7.2. Representable Numbers \\
7.3. Special Bit Patterns and Precision \\
7.4. Arithmetic Accuracy and Rounding \\
7.5. Algorithm Considerations \\
7.6. Summary \\
8: Application Case Study I \\
Advanced MRI Reconstruction \\
8.1. Application Background \\
8.2. Iterative Reconstruction \\
8.3. Computing FHd \\
Step 1: Determine the Kernel Parallelism Structure \\
Step 2: Getting Around the Memory Bandwidth Limitation
\\
Step 3: Use Hardware Trigonometry Functions \\
Step 4: Experimental Performance Testing \\
8.4. Final Evaluation \\
9: Application Case Study II \\
Molecular Visualization and Analysis \\
9.1. Application Background \\
9.2. A Simple Kernel Implementation \\
9.3. Instruction Execution Efficiency \\
9.4. Memory Coalescing \\
9.5. Additional Performance Comparisons \\
9.6. Using Multiple GPUs \\
10: Parallel Programming and Computational Thinking \\
10.1. Goals of Parallel Programming \\
10.2. Problem Decomposition \\
10.3. Algorithm Selection \\
10.4. Computational Thinking \\
11: A Brief Introduction to OpenCL? \\
11.1. Background \\
11.2. Data Parallelism Model \\
11.3. Device Architecture \\
11.4. Kernel Functions \\
11.5. Device Management and Kernel Launch \\
11.6. Electrostatic Potential Map in OpenCL \\
11.7. Summary \\
12: Conclusion and Future Outlook \\
12.1. Goals Revisited \\
12.2. Memory Architecture Evolution \\
12.3. Kernel Execution Control Evolution \\
12.4. Core Performance \\
12.5. Programming Environment \\
12.6. A Bright Outlook \\
Appendix A: Matrix Multiplication Example Code \\
Appendix B: Speed and feed of current generation CUDA
devices",
}
@Article{Komatitsch:2010:HOF,
author = "Dimitri Komatitsch and Gordon Erlebacher and Dominik
G{\"o}ddeke and David Mich{\'e}a",
title = "High-order finite-element seismic wave propagation
modeling with {MPI} on a large {GPU} cluster",
journal = j-J-COMPUT-PHYS,
volume = "229",
number = "20",
pages = "7692--7714",
day = "1",
month = oct,
year = "2010",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1016/j.jcp.2010.06.024",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Sat Dec 31 11:58:42 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999110003396",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Koval:2010:USB,
author = "Peter Koval and J. D. Talman",
title = "Update of spherical {Bessel} transform: {FFTW} and
{OpenMP}",
journal = j-COMP-PHYS-COMM,
volume = "181",
number = "12",
pages = "2212--2213",
month = dec,
year = "2010",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2010.08.024",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 09:54:31 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465510003188",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Kwon:2010:SPC,
author = "Seongnam Kwon and Soonhoi Ha",
title = "Serialized parallel code generation framework for
{MPSoC}",
journal = j-TODAES,
volume = "15",
number = "2",
pages = "11:1--11:??",
month = feb,
year = "2010",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/1698759.1698761",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Mon Mar 15 11:19:08 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The models of computations that express concurrency
naturally are preferred for initial specification of
MPSoC system, since popular programming languages such
as C and C++ are designed for sequential execution. In
our previous work, we proposed a design framework where
two models are used for the initial specification of
the system behavior; task model at the top level and
dataflow model inside each task. After the partition
and mapping process is performed with each architecture
candidate, the target code is automatically generated
for both Design-Space Exploration (DSE) and final
implementation. In this article, we focus on parallel
code generation for MPSoC, proposing two main
techniques. The first is to express functional and data
parallelism differently following the partition and
mapping decision. In the proposed technique, the
generated code consists of multiple tasks running
concurrently, which achieves functional parallelism. On
the other hand, we use OpenMP directives to express
data parallelism inside a task. Second is to adopt the
code serialization technique to execute a multitasking
application without OS scheduler, aiming to generate
the highly portable code on various platforms for an
efficient DSE process. We extend the previous code
serialization techniques to multiprocessor systems and
utilize the formal properties of the dataflow model for
efficient code generation. The experiments including
H.263 codec example show the viability of the proposed
technique and the efficiency of the generated code.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems (TODAES)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
keywords = "design-space exploration; Embedded software;
multiprocessor system on chip; parallel programming;
software generation",
}
@Article{Lastovetsky:2010:RAP,
author = "Alexey Lastovetsky and Tahar Kechadi",
title = "Recent Advances in {Parallel Virtual Machine} and
{Message Passing Interface}",
journal = j-IJHPCA,
volume = "24",
number = "1",
pages = "3--4",
month = feb,
year = "2010",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342009359523",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Aug 31 09:59:45 MDT 2010",
bibsource = "http://hpc.sagepub.com/content/24/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/24/1/3.full.pdf+html",
acknowledgement = ack-nhfb,
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Li:2010:SVC,
author = "Guodong Li and Ganesh Gopalakrishnan and Robert M.
Kirby and Dan Quinlan",
title = "A symbolic verifier for {CUDA} programs",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "357--358",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693512",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a preliminary automated verifier based on
mechanical decision procedures which is able to prove
functional correctness of CUDA programs and guarantee
to detect bugs such as race conditions. We also employ
a symbolic partial order reduction (POR) technique to
mitigate the interleaving explosion problem.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "cuda; formal verification; SPMD; symbolic analysis",
}
@Article{Lin:2010:TLS,
author = "Paul T. Lin and John N. Shadid",
title = "Towards large-scale multi-socket, multicore parallel
simulations: Performance of an {MPI}-only semiconductor
device simulator",
journal = j-J-COMPUT-PHYS,
volume = "229",
number = "19",
pages = "6804--6818",
day = "20",
month = sep,
year = "2010",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1016/j.jcp.2010.05.023",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Sat Dec 31 11:58:37 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999110002846",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Liu:2010:RTC,
author = "Fuchang Liu and Takahiro Harada and Youngeun Lee and
Young J. Kim",
title = "Real-time collision culling of a million bodies on
graphics processing units",
journal = j-TOG,
volume = "29",
number = "6",
pages = "154:1--154:??",
month = dec,
year = "2010",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/1882261.1866180",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Thu Dec 9 11:41:01 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tog/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
abstract = "We cull collisions between very large numbers of
moving bodies using graphics processing units (GPUs).
To perform massively parallel sweep-and-prune (SaP), we
mitigate the great density of intervals along the axis
of sweep by using principal component analysis to
choose the best sweep direction, together with spatial
subdivisions to further reduce the number of false
positive overlaps. Our algorithm implemented entirely
on GPUs using the CUDA framework can handle a million
moving objects at interactive rates. As application of
our algorithm, we demonstrate the real-time simulation
of very large numbers of particles and rigid-body
dynamics.",
acknowledgement = ack-nhfb,
articleno = "154",
fjournal = "ACM Transactions on Graphics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778",
}
@Article{Marjanovic:2010:ECC,
author = "Vladimir Marjanovic and Jes{\'u}s Labarta and Eduard
Ayguad{\'e} and Mateo Valero",
title = "Effective communication and computation overlap with
hybrid {MPI\slash SMPSs}",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "337--338",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1837853.1693502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Communication overhead is one of the dominant factors
affecting performance in high-performance computing
systems. To reduce the negative impact of
communication, programmers overlap communication and
computation by using asynchronous communication
primitives. This increases code complexity, requiring
more development effort and making less readable
programs. This paper presents the hybrid use of MPI and
SMPSs (SMP superscalar, a task-based shared-memory
programming model) that allows the programmer to easily
introduce the asynchrony necessary to overlap
communication and computation. We demonstrate the
hybrid use of MPI/SMPSs with the high-performance
LINPACK benchmark (HPL), and compare it to the pure MPI
implementation, which uses the look-ahead technique to
overlap communication and computation. The hybrid
MPI/SMPSs version significantly improves the
performance of the pure MPI version, getting close to
the asymptotic performance at medium problem sizes and
still getting significant benefits at small/large
problem sizes.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "hybrid MPI/SMPSs; LINPACK; MPI; parallel programming
model",
}
@Article{Molnar:2010:APM,
author = "F. {Moln{\'a}r, Jr.} and T. Szak{\'a}ly and R.
M{\'e}sz{\'a}ros and I. Lagzi",
title = "Air pollution modelling using a {Graphics Processing
Unit} with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "181",
number = "1",
pages = "105--112",
month = jan,
year = "2010",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2009.09.008",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 09:54:27 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465509002872",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Muller:2010:SMA,
author = "Matthias S. M{\"u}ller and Matthijs van Waveren and
Ron Lieberman and Brian Whitney and Hideki Saito and
Kalyan Kumaran and John Baron and William C. Brantley
and Chris Parrott and Tom Elken and Huiyu Feng and Carl
Ponder",
title = "{SPEC MPI2007} --- an application benchmark suite for
parallel systems using {MPI}",
journal = j-CCPE,
volume = "22",
number = "2",
pages = "191--205",
month = feb,
year = "2010",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1535",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:41 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "2 Dec 2009",
}
@Article{Nesterov:2010:SPT,
author = "Oleksandr Nesterov",
title = "A simple parallelization technique with {MPI} for
ocean circulation models",
journal = j-J-PAR-DIST-COMP,
volume = "70",
number = "1",
pages = "35--44",
month = jan,
year = "2010",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Sep 1 16:27:27 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Nunez:2010:NTS,
author = "Alberto N{\'u}{\~n}ez and Javier Fern{\'a}ndez and
Jose D. Garcia and F{\'e}lix Garcia and Jes{\'u}s
Carretero",
title = "New techniques for simulating high performance {MPI}
applications on large storage networks",
journal = j-J-SUPERCOMPUTING,
volume = "51",
number = "1",
pages = "40--57",
month = jan,
year = "2010",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Aug 25 08:38:45 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=51&issue=1;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=51&issue=1&spage=40",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Okitsu:2010:HPC,
author = "Yusuke Okitsu and Fumihiko Ino and Kenichi Hagihara",
title = "High-performance cone beam reconstruction using {CUDA}
compatible {GPUs}",
journal = j-PARALLEL-COMPUTING,
volume = "36",
number = "2--3",
pages = "129--141",
month = feb # "\slash " # mar,
year = "2010",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Sep 2 17:51:12 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Olivier:2010:COO,
author = "Stephen L. Olivier and Jan F. Prins",
title = "Comparison of {OpenMP 3.0} and Other Task Parallel
Frameworks on Unbalanced Task Graphs",
journal = j-INT-J-PARALLEL-PROG,
volume = "38",
number = "5--6",
pages = "341--360",
month = oct,
year = "2010",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 1 16:06:49 MDT 2010",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=38&issue=5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=38&issue=5&spage=341",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Pan:2010:CPS,
author = "Heidi Pan and Benjamin Hindman and Krste
Asanovi{\'c}",
title = "Composing parallel software efficiently with {Lithe}",
journal = j-SIGPLAN,
volume = "45",
number = "6",
pages = "376--387",
month = jun,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1809028.1806639",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Oct 8 17:53:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Applications composed of multiple parallel libraries
perform poorly when those libraries interfere with one
another by obliviously using the same physical cores,
leading to destructive resource oversubscription. This
paper presents the design and implementation of {\em
Lithe}, a low-level substrate that provides the basic
primitives and a standard interface for composing
parallel codes efficiently. Lithe can be inserted
underneath the runtimes of legacy parallel libraries to
provide {\em bolt-on\/} composability without needing
to change existing application code. Lithe can also
serve as the foundation for building new parallel
abstractions and libraries that automatically
interoperate with one another.\par
In this paper, we show versions of Threading Building
Blocks (TBB) and OpenMP perform competitively with
their original implementations when ported to Lithe.
Furthermore, for two applications composed of multiple
parallel libraries, we show that leveraging our
substrate outperforms their original, even expertly
tuned, implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "composability; cooperative scheduling; hierarchical
scheduling; oversubscription; parallelism; resource
management; user-level scheduling",
}
@Article{Pervez:2010:FMA,
author = "Salman Pervez and Ganesh Gopalakrishnan and Robert M.
Kirby and Rajeev Thakur and William Gropp",
title = "Formal methods applied to high-performance computing
software design: a case study of {MPI} one-sided
communication-based locking",
journal = j-SPE,
volume = "40",
number = "1",
pages = "23--43",
day = "??",
month = jan,
year = "2010",
CODEN = "SPEXBL",
DOI = "https://doi.org/10.1002/spe.946",
ISSN = "0038-0644 (print), 1097-024X (electronic)",
ISSN-L = "0038-0644",
bibdate = "Wed Mar 17 10:16:21 MDT 2010",
bibsource = "http://www.interscience.wiley.com/jpages/0038-0644;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Soft{\-}ware\emdash Prac{\-}tice and Experience",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X",
onlinedate = "Dec 21 2009 3:42AM",
}
@Article{Preissl:2010:OCC,
author = "Robert Preissl and Alice Koniges and Stephan Ethier
and Weixing Wang and Nathan Wichmann",
title = "Overlapping communication with computation using
{OpenMP} tasks on the {GTS} magnetic fusion code",
journal = j-SCI-PROG,
volume = "18",
number = "3--4",
pages = "139--151",
month = "????",
year = "2010",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.3233/SPR-2010-0311",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Dec 13 19:01:33 MST 2011",
bibsource = "http://www.iospress.nl/journal/scientific-programming/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Preissl:2010:TMS,
author = "Robert Preissl and Martin Schulz and Dieter
Kranzlm{\"u}ller and Bronis R. de Supinski and Daniel
J. Quinlan",
title = "Transforming {MPI} source code based on communication
patterns",
journal = j-FUT-GEN-COMP-SYS,
volume = "26",
number = "1",
pages = "147--154",
month = jan,
year = "2010",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Sat Sep 11 13:08:16 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Sainio:2010:CGA,
author = "J. Sainio",
title = "{CUDAEASY} --- a {GPU} accelerated cosmological
lattice program",
journal = j-COMP-PHYS-COMM,
volume = "181",
number = "5",
pages = "906--912",
month = may,
year = "2010",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2010.01.002",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 09:54:29 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465510000159",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Saldana:2010:MPM,
author = "Manuel Salda{\~n}a and Arun Patel and Christopher
Madill and Daniel Nunes and Danyao Wang and Paul Chow
and Ralph Wittig and Henry Styles and Andrew Putnam",
title = "{MPI} as a Programming Model for High-Performance
Reconfigurable Computers",
journal = j-TRETS,
volume = "3",
number = "4",
pages = "22:1--22:??",
month = nov,
year = "2010",
CODEN = "????",
DOI = "https://doi.org/10.1145/1862648.1862652",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Tue Nov 23 11:26:33 MST 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Book{Sanders:2010:CEI,
author = "Jason Sanders and Edward Kandrot",
title = "{CUDA} by Example: an Introduction to General-purpose
{GPU} Programming",
publisher = pub-AW,
address = pub-AW:adr,
pages = "xix + 290",
year = "2010",
ISBN = "0-13-138768-5",
ISBN-13 = "978-0-13-138768-3",
LCCN = "QA76.76.A65",
bibdate = "Wed Jul 28 23:24:12 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/master.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib;
z3950.gbv.de:20011/gvk",
abstract = "CUDA is a computing architecture designed to
facilitate the development of parallel programs. This
book shows programmers how to employ this new
technology. Each area of CUDA development is introduced
through working examples. After a concise introduction
to the CUDA platform and architecture, as well as a
quick-start guide to CUDA C, the book details the
techniques and trade-offs associated with each key CUDA
feature.",
acknowledgement = ack-nhfb,
keywords = "CUDA; GPU",
subject = "application software; development; computer
architecture; parallel programming (computer science)",
tableofcontents = "Why CUDA? why now? \\
Getting started \\
Introduction to CUDA C \\
Parallel programming in CUDA C \\
Thread cooperation \\
Constant memory and events \\
Texture memory \\
Graphics interoperability \\
Atomics \\
Streams \\
CUDA C on multiple GPUs \\
The final countdown \\
Appendix A: Advanced atomics",
}
@Article{Sandes:2010:CUG,
author = "Edans Flavius O. Sandes and Alba Cristina M. A. de
Melo",
title = "{CUDAlign}: using {GPU} to accelerate the comparison
of megabase genomic sequences",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "137--146",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693473",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Biological sequence comparison is a very important
operation in Bioinformatics. Even though there do exist
exact methods to compare biological sequences, these
methods are often neglected due to their quadratic time
and space complexity. In order to accelerate these
methods, many GPU algorithms were proposed in the
literature. Nevertheless, all of them restrict the size
of the smallest sequence in such a way that Megabase
genome comparison is prevented. In this paper, we
propose and evaluate CUDAlign, a GPU algorithm that is
able to compare Megabase biological sequences with an
exact Smith--Waterman affine gap variant. CUDAlign was
implemented in CUDA and tested in two GPU boards,
separately. For real sequences whose size range from
1MBP (Megabase Pairs) to 47MBP, a close to uniform
GCUPS (Giga Cells Updates per Second) was obtained,
showing the potential scalability of our approach.
Also, CUDAlign was able to compare the human chromosome
21 and the chimpanzee chromosome 22. This operation
took 21 hours on GeForce GTX 280, resulting in a peak
performance of 20.375 GCUPS. As far as we know, this is
the first time such huge chromosomes are compared with
an exact method.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "biological sequence comparison; GPU; Smith--Waterman",
}
@Article{Segovia:2010:PPN,
author = "Alejandro Segovia",
title = "Parallel programming with {NVIDIA CUDA}",
journal = j-LINUX-J,
volume = "2010",
number = "200",
pages = "2:1--2:??",
month = dec,
year = "2010",
CODEN = "LIJOFX",
ISSN = "1075-3583 (print), 1938-3827 (electronic)",
ISSN-L = "1075-3583",
bibdate = "Mon Jan 10 10:01:27 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/linux-journal.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "Linux Journal",
journal-URL = "http://portal.acm.org/citation.cfm?id=J508",
}
@Article{Shi:2010:PAE,
author = "Haixiang Shi and Bertil Schmidt and Weiguo Liu and
Wolfgang M{\"u}ller-Wittig",
title = "A Parallel Algorithm for Error Correction in
High-Throughput Short-Read Data on {CUDA}-Enabled
Graphics Hardware",
journal = j-J-COMPUT-BIOL,
volume = "17",
number = "4",
pages = "603--615",
month = apr,
year = "2010",
CODEN = "JCOBEM",
DOI = "https://doi.org/10.1089/cmb.2009.0062",
ISSN = "1066-5277 (print), 1557-8666 (electronic)",
ISSN-L = "1066-5277",
bibdate = "Sat Jun 1 09:49:51 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputbiol.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.liebertpub.com/doi/abs/10.1089/cmb.2009.0062;
https://www.liebertpub.com/doi/pdf/10.1089/cmb.2009.0062",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Biology",
journal-URL = "https://www.liebertpub.com/loi/cmb/",
onlinedate = "28 April 2010",
}
@Article{Stone:2010:OPP,
author = "John E. Stone and David Gohara and Guochun Shi",
title = "{OpenCL}: a Parallel Programming Standard for
Heterogeneous Computing Systems",
journal = j-COMPUT-SCI-ENG,
volume = "12",
number = "3",
pages = "66--73",
month = may # "\slash " # jun,
year = "2010",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2010.69",
ISSN = "0740-7475 (print), 1558-1918 (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu May 13 11:08:14 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@InProceedings{Suciu:2010:PIN,
author = "A. Suciu and I. Nagy and K. Marton and I. Pinca",
editor = "Ioan Alfred Letia",
booktitle = "{Proceedings, 2010 IEEE 6th International Conference
on Intelligent Computer Communication and Processing:
Cluj-Napoca, Romania, August 26--28, 2010}",
title = "Parallel implementation of the {NIST Statistical Test
Suite}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
bookpages = "xiii + 487",
pages = "363--368",
year = "2010",
DOI = "https://doi.org/10.1109/ICCP.2010.5606412",
ISBN = "1-4244-8228-3 (print), 1-4244-8230-5 (electronic)",
ISBN-13 = "978-1-4244-8228-3 (print), 978-1-4244-8230-6
(electronic)",
LCCN = "QA76.76.E95",
bibdate = "Tue Jan 31 14:22:16 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number CFP1009D-ART.",
URL = "http://ieeexplore.ieee.org/servlet/opac?punumber=5598248",
acknowledgement = ack-nhfb,
keywords = "OpenMP API",
onlinedate = "21 October 2010",
remark = "From the abstract: ``Experimental results show a very
significant speedup of up to 103 times compared to the
original version.''",
}
@Article{Traff:2010:SCM,
author = "Jesper Larsson Traff and William D. Gropp and Rajeev
Thakur",
title = "Self-Consistent {MPI} Performance Guidelines",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "21",
number = "5",
pages = "698--709",
month = may,
year = "2010",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2009.120",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu May 13 12:06:56 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Tzannes:2010:LBS,
author = "Alexandros Tzannes and George C. Caragea and Rajeev
Barua and Uzi Vishkin",
title = "Lazy binary-splitting: a run-time adaptive
work-stealing scheduler",
journal = j-SIGPLAN,
volume = "45",
number = "5",
pages = "179--190",
month = may,
year = "2010",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1693453.1693479",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Aug 31 22:39:18 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We present Lazy Binary Splitting (LBS), a user-level
scheduler of nested parallelism for shared-memory
multiprocessors that builds on existing Eager Binary
Splitting work-stealing (EBS) implemented in Intel's
Threading Building Blocks (TBB), but improves
performance and ease-of-programming. In its simplest
form (SP), EBS requires manual tuning by repeatedly
running the application under carefully controlled
conditions to determine a {\em stop-splitting-threshold
(sst)\/} for every do-all loop in the code. This
threshold limits the parallelism and prevents excessive
overheads for fine-grain parallelism. Besides being
tedious, this tuning also over-fits the code to some
particular dataset, platform and calling context of the
do-all loop, resulting in poor performance portability
for the code. LBS overcomes both the performance
portability and ease-of-programming pitfalls of a
manually fixed threshold by adapting dynamically to
run-time conditions without requiring tuning.\par
We compare LBS to Auto-Partitioner (AP), the latest
default scheduler of TBB, which does not require manual
tuning either but lacks context portability, and
outperform it by 38.9\% using TBB's default AP
configuration, and by 16.2\% after we tuned AP to our
experimental platform. We also compare LBS to SP by
manually finding SP's sst using a training dataset and
then running both on a different execution dataset. LBS
outperforms SP by 19.5\% on average. while allowing for
improved performance portability without requiring
tedious manual tuning. LBS also outperforms SP with
{\em sst=1}, its default value when undefined, by
56.7\%, and serializing work-stealing (SWS), another
work-stealer by 54.7\%. Finally, compared to
serializing inner parallelism (SI) which has been used
by OpenMP, LBS is 54.2\% faster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIG{\-}PLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
keywords = "dynamic scheduling; load balancing; nested
parallelism; thread scheduling; work stealing",
}
@Article{Wendykier:2010:PCH,
author = "Piotr Wendykier and James G. Nagy",
title = "{Parallel Colt}: a High-Performance {Java} Library for
Scientific Computing and Image Processing",
journal = j-TOMS,
volume = "37",
number = "3",
pages = "31:1--31:22",
month = sep,
year = "2010",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/1824801.1824809",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon Sep 27 10:15:50 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "Major breakthroughs in chip and software design have
been observed for the last nine years. In October 2001,
IBM released the world's first multicore processor:
POWER4. Six years later, in February 2007, NVIDIA made
a public release of CUDA SDK, a set of development
tools to write algorithms for execution on Graphic
Processing Units (GPUs). Although software vendors have
started working on parallelizing their products, the
vast majority of existing code is still sequential and
does not effectively utilize modern multicore CPUs and
manycore GPUs.\par
This article describes Parallel Colt, a multithreaded
Java library for scientific computing and image
processing. In addition to describing the design and
functionality of Parallel Colt, a comparison to MATLAB
is presented. Two ImageJ plugins for iterative image
deblurring and motion correction of PET brain images
are described as typical applications of this library.
Performance comparisons with MATLAB, including GPU
computations via AccelerEyes' Jacket toolbox are also
given.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
keywords = "Deconvolution; FFT; inverse problems; iterative
methods; motion correction; multithreading; PET;
regularization",
}
@InProceedings{Zhao:2010:GMP,
author = "Kaiyong Zhao and Xiaowen Chu",
editor = "{IEEE}",
booktitle = "{IEEE 10th International Conference on Computer and
Information Technology (CIT), 2010: June 29, 2010--July
1, 2010, Bradford, West Yorkshire, UK}",
title = "{GPUMP}: a Multiple-Precision Integer Library for
{GPUs}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
bookpages = "xcix + 2987 (est.)",
pages = "1164--1168",
year = "2010",
DOI = "https://doi.org/10.1109/CIT.2010.211",
ISBN = "0-7695-4108-9 (print), 1-4244-7547-3",
ISBN-13 = "978-0-7695-4108-2 (print), 978-1-4244-7547-6",
LCCN = "????",
bibdate = "Thu Jan 16 10:33:01 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE Computer Society Order Number E4108. BMS Part
Number: CFP10355-CDR",
acknowledgement = ack-nhfb,
book-URL = "http://ieeexplore.ieee.org/servlet/opac?punumber=5575291",
keywords = "CUDA; GPU; multiple-precision algorithm;
multiple-precision comparison; multiple-precision
division; multiple-precision exponentiation;
multiple-precision modular addition; multiple-precision
modular multiplication; multiple-precision Montgomery
exponentiation; multiple-precision Montgomery
multiplication; multiple-precision Montgomery
reduction; multiple-precision multiplication; nVidia
GT200 GPU",
}
@Article{Agrawal:2011:PPS,
author = "Ankit Agrawal and Sanchit Misra and Daniel Honbo and
Alok Choudhary",
title = "Parallel pairwise statistical significance estimation
of local sequence alignment using {Message Passing
Interface} library",
journal = j-CCPE,
volume = "23",
number = "17",
pages = "2269--2279",
day = "10",
month = dec,
year = "2011",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1798",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:09:00 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "7 Jul 2011",
}
@Article{Agullo:2011:QOM,
author = "Emmanuel Agullo and Camille Coti and Thomas Herault
and Julien Langou and Sylvain Peyronnet and Ala
Rezmerita and Franck Cappello and Jack Dongarra",
title = "{QCG-OMPI}: {MPI} applications on grids",
journal = j-FUT-GEN-COMP-SYS,
volume = "27",
number = "4",
pages = "357--369",
month = apr,
year = "2011",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Tue Aug 30 11:43:29 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Alonso:2011:NEM,
author = "P. Alonso and R. Cortina and F. J.
Mart{\'\i}nez-Zald{\'\i}var and J. Ranilla",
title = "{Neville} elimination on multi- and many-core systems:
{OpenMP}, {MPI} and {CUDA}",
journal = j-J-SUPERCOMPUTING,
volume = "58",
number = "2",
pages = "215--225",
month = nov,
year = "2011",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Dec 13 15:25:06 MST 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=58&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=58&issue=2&spage=215",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Balaji:2011:MMC,
author = "Pavan Balaji and Darius Buntinas and David Goodell and
William Gropp and Torsten Hoefler and Sameer Kumar and
Ewing Lusk and Rajeev Thakur and Jesper Larsson
Tr{\"a}ff",
title = "{MPI} on Millions of Cores",
journal = j-PARALLEL-PROCESS-LETT,
volume = "21",
number = "1",
pages = "45--60",
month = mar,
year = "2011",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626411000060",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
bibdate = "Tue Feb 28 11:32:06 MST 2012",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Balevic:2011:KAD,
author = "Ana Balevic and Bart Kienhuis",
title = "{KPN2GPU}: an approach for discovery and exploitation
of fine-grain data parallelism in process networks",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "66--71",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082173",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "With advances in manycore and accelerator
architectures, the high performance and embedded spaces
are rapidly converging. Emerging architectures feature
different forms of parallelism. The Polyhedral
Processes Networks (PPNs) are a proven model of choice
for automated generation of pipeline and task parallel
programs from sequential source code, however data
parallelism is not addressed. In this paper, we present
a systematic approach for identification and extraction
of fine grain data parallelism from the PPN
specification. The approach is implemented in a tool,
called kpn2gpu, which produces fine-grain data parallel
CUDA kernels for graphics processing units (GPUs).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Bhattacharjee:2011:PLC,
author = "Abhishek Bhattacharjee and Gilberto Contreras and
Margaret Martonosi",
title = "Parallelization libraries: Characterizing and reducing
overheads",
journal = j-TACO,
volume = "8",
number = "1",
pages = "5:1--5:??",
month = apr,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1952998.1953003",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Wed Apr 27 07:54:03 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Creating efficient, scalable dynamic parallel runtime
systems for chip multiprocessors (CMPs) requires
understanding the overheads that manifest at high core
counts and small task sizes. In this article, we assess
these overheads on Intel's Threading Building Blocks
(TBB) and OpenMP. First, we use real hardware and
simulations to detail various scheduler and
synchronization overheads. We find that these can
amount to 47\% of TBB benchmark runtime and 80\% of
OpenMP benchmark runtime. Second, we propose load
balancing techniques such as occupancy-based and
criticality-guided task stealing, to boost performance.
Overall, our study provides valuable insights for
creating robust, scalable runtime libraries.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Camp:2011:SIU,
author = "David Camp and Christoph Garth and Hank Childs and
Dave Pugmire and Kenneth I. Joy",
title = "Streamline Integration Using {MPI}-Hybrid Parallelism
on a Large Multicore Architecture",
journal = j-IEEE-TRANS-VIS-COMPUT-GRAPH,
volume = "17",
number = "11",
pages = "1702--1713",
month = nov,
year = "2011",
CODEN = "ITVGEA",
DOI = "https://doi.org/10.1109/TVCG.2010.259",
ISSN = "1077-2626 (print), 1941-0506 (electronic), 2160-9306",
ISSN-L = "1077-2626",
bibdate = "Thu Sep 29 11:52:46 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetransviscomputgraph.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Visualization and Computer
Graphics",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2945",
}
@Article{Cao:2011:OMM,
author = "Chao Cao and Yun-wen Chen and Yuning Wu and Erik
Deumens and Hai-Ping Cheng",
title = "{OPAL}: a multiscale multicenter simulation package
based on {MPI-2} protocol",
journal = j-IJQC,
volume = "111",
number = "15",
pages = "4020--4029",
month = dec,
year = "2011",
CODEN = "IJQCB2",
DOI = "https://doi.org/10.1002/qua.22916",
ISSN = "0020-7608 (print), 1097-461X (electronic)",
ISSN-L = "0020-7608",
bibdate = "Sat Oct 1 15:40:12 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijqc2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Int. J. Quantum Chem.",
fjournal = "International Journal of Quantum Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0020-7608/",
onlinedate = "23 Nov 2010",
}
@Article{Catanzaro:2011:CCE,
author = "Bryan Catanzaro and Michael Garland and Kurt Keutzer",
title = "{Copperhead}: compiling an embedded data parallel
language",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "47--56",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941562",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Modern parallel microprocessors deliver high
performance on applications that expose substantial
fine-grained data parallelism. Although data
parallelism is widely available in many computations,
implementing data parallel algorithms in low-level
languages is often an unnecessarily difficult task. The
characteristics of parallel microprocessors and the
limitations of current programming methodologies
motivate our design of Copperhead, a high-level data
parallel language embedded in Python. The Copperhead
programmer describes parallel computations via
composition of familiar data parallel primitives
supporting both flat and nested data parallel
computation on arrays of data. Copperhead programs are
expressed in a subset of the widely used Python
programming language and interoperate with standard
Python modules, including libraries for numeric
computation, data visualization, and analysis. In this
paper, we discuss the language, compiler, and runtime
features that enable Copperhead to efficiently execute
data parallel code. We define the restricted subset of
Python which Copperhead supports and introduce the
program analysis techniques necessary for compiling
Copperhead code into efficient low-level
implementations. We also outline the runtime support by
which Copperhead programs interoperate with standard
Python modules. We demonstrate the effectiveness of our
techniques with several examples targeting the CUDA
platform for parallel programming on GPUs. Copperhead
code is concise, on average requiring 3.6 times fewer
lines of code than CUDA, and the compiler generates
efficient code, yielding 45-100\% of the performance of
hand-crafted, well optimized CUDA code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Chalkidis:2011:HPH,
author = "Georgios Chalkidis and Masao Nagasaki and Satoru
Miyano",
title = "High Performance Hybrid Functional {Petri} Net
Simulations of Biological Pathway Models on {CUDA}",
journal = j-TCBB,
volume = "8",
number = "6",
pages = "1545--1556",
month = nov,
year = "2011",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2010.118",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Sun Nov 6 06:45:50 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Czapinski:2011:TST,
author = "Michal Czapi{\'n}ski and Stuart Barnes",
title = "{Tabu Search} with two approaches to parallel flowshop
evaluation on {CUDA} platform",
journal = j-J-PAR-DIST-COMP,
volume = "71",
number = "6",
pages = "802--811",
month = jun,
year = "2011",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2011.02.006",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Feb 25 09:11:32 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731511000384",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{delaAsuncion:2011:SOL,
author = "Marc de la Asunci{\'o}n and Jos{\'e} M. Mantas and
Manuel J. Castro",
title = "Simulation of one-layer shallow water systems on
multicore and {CUDA} architectures",
journal = j-J-SUPERCOMPUTING,
volume = "58",
number = "2",
pages = "206--214",
month = nov,
year = "2011",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Dec 13 15:25:06 MST 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=58&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=58&issue=2&spage=206",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Dohi:2011:GIO,
author = "Keisuke Dohi and Yuichiro Shibata and Kiyoshi Oguri
and Takafumi Fujimoto",
title = "{GPU} implementation and optimization of
electromagnetic simulation using the {FDTD} method for
antenna designing",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "26--31",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082163",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper describes electromagnetical field
simulation using the 3D-FDTD method for antenna
designing on a CUDA-compatible GPU. We use the Split
Perfectly Matched Layer as an absorbing boundary
condition. As is well known, the 3D-FDTD method is a
kind of stencil computation and is considered better at
GPU implementation. In order to find the best blocking
size for the target GPU architecture, we empirically
explore a design space of blocking size. We also
propose a kernel fusing method as one of the efficient
optimization methods, which improves the total
performance about 10\% at the cost of a small increase
in memory usage.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Dotsenko:2011:ATF,
author = "Yuri Dotsenko and Sara S. Baghsorkhi and Brandon Lloyd
and Naga K. Govindaraju",
title = "Auto-tuning of {Fast Fourier Transform} on graphics
processors",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "257--266",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941589",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "We present an auto-tuning framework for FFTs on
graphics processors (GPUs). Due to complex design of
the memory and compute subsystems on GPUs, the
performance of FFT kernels over the range of possible
input parameters can vary widely. We generate several
variants for each component of the FFT kernel that, for
different cases, are likely to perform well. Our
auto-tuner composes variants to generate kernels and
selects the best ones. We present heuristics to prune
the search space and profile only a small fraction of
all possible kernels. We compose optimized kernels to
improve the performance of larger FFT computations. We
implement the system using the NVIDIA CUDA API and
compare its performance to the state-of-the-art FFT
libraries. On a range of NVIDIA GPUs and input sizes,
our auto-tuned FFTs outperform the NVIDIA CUFFT 3.0
library by up to 38x and deliver up to 3x higher
performance compared to a manually-tuned FFT.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Ewedafe:2011:PID,
author = "Simon Uzezi Ewedafe and Rio Hirowati Shariffudin",
title = "Parallel Implementation of {$2$-D} Telegraphic
Equation on {MPI\slash PVM} Cluster",
journal = j-INT-J-PARALLEL-PROG,
volume = "39",
number = "2",
pages = "202--231",
month = apr,
year = "2011",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Tue Sep 6 21:08:27 MDT 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=39&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=39&issue=2&spage=202",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Filgueira:2011:ACE,
author = "Rosa Filgueira and David E. Singh and Jes{\'u}s
Carretero and Alejandro Calder{\'o}n and F{\'e}lix
Garc{\'\i}a",
title = "{Adaptive-CoMPI}: Enhancing {MPI}-Based Applications'
Performance and Scalability by using Adaptive
Compression",
journal = j-IJHPCA,
volume = "25",
number = "1",
pages = "93--114",
month = feb,
year = "2011",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342010373486",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Sep 6 15:14:36 MDT 2011",
bibsource = "http://hpc.sagepub.com/content/25/1.toc;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/25/1/93.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "July 26, 2010",
}
@Article{Fousek:2011:AFC,
author = "Jan Fousek and Ji{\v{r}}i Filipovi{\v{c}} and
Matu{\v{s}} Madzin",
title = "Automatic fusions of {CUDA--GPU} kernels for parallel
map",
journal = j-COMP-ARCH-NEWS,
volume = "39",
number = "4",
pages = "98--99",
month = sep,
year = "2011",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2082156.2082183",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Tue Dec 20 17:53:58 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "When implementing a function mapping on the
contemporary GPU, several contradictory performance
factors affecting distribution of computation into GPU
kernels have to be balanced. A decomposition-fusion
scheme suggests to decompose the computational problem
to be solved by several simple functions implemented as
standalone kernels and to fuse some of these functions
later into more complex kernels to improve memory
locality. In this paper, a prototype of
source-to-source compiler automating the fusion phase
is presented and the impact of fusions generated by the
compiler as well as compiler efficiency is
experimentally evaluated.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Garcia:2011:KRR,
author = "Saturnino Garcia and Donghwan Jeon and Christopher M.
Louie and Michael Bedford Taylor",
title = "{Kremlin}: rethinking and rebooting {{\tt gprof}} for
the multicore age",
journal = j-SIGPLAN,
volume = "46",
number = "6",
pages = "458--469",
month = jun,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/1993316.1993553",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 10:23:33 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Many recent parallelization tools lower the barrier
for parallelizing a program, but overlook one of the
first questions that a programmer needs to answer:
which parts of the program should I spend time
parallelizing?\par
This paper examines Kremlin, an automatic tool that,
given a serial version of a program, will make
recommendations to the user as to what regions (e.g.
loops or functions) of the program to attack first.
Kremlin introduces a novel hierarchical critical path
analysis and develops a new metric for estimating the
potential of parallelizing a region: self-parallelism.
We further introduce the concept of a parallelism
planner, which provides a ranked order of specific
regions to the programmer that are likely to have the
largest performance impact when parallelized. Kremlin
supports multiple planner personalities, which allow
the planner to more effectively target a particular
programming environment or class of machine.\par
We demonstrate the effectiveness of one such
personality, an OpenMP planner, by comparing versions
of programs that are parallelized according to
Kremlin's plan against third-party manually
parallelized versions. The results show that Kremlin's
OpenMP planner is highly effective, producing plans
whose performance is typically comparable to, and
sometimes much better than, manual parallelization. At
the same time, these plans would require that the user
parallelize significantly fewer regions of the
program.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Gopalakrishnan:2011:FAM,
author = "Ganesh Gopalakrishnan and Robert M. Kirby and Stephen
Siegel and Rajeev Thakur and William Gropp and Ewing
Lusk and Bronis R. De Supinski and Martin Schulz and
Greg Bronevetsky",
title = "Formal analysis of {MPI}-based parallel programs",
journal = j-CACM,
volume = "54",
number = "12",
pages = "82--91",
month = dec,
year = "2011",
CODEN = "CACMA2",
DOI = "https://doi.org/10.1145/2043174.2043194",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Tue Nov 29 11:53:53 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/cacm/;
http://www.math.utah.edu/pub/tex/bib/cacm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Communications of the ACM",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79",
}
@Book{Hager:2011:IHP,
author = "Georg Hager and Gerhard Wellein",
title = "Introduction to high performance computing for
scientists and engineers",
volume = "7",
publisher = pub-CRC,
address = pub-CRC:adr,
pages = "xxv + 330 + 4",
year = "2011",
ISBN = "1-4398-1192-X",
ISBN-13 = "978-1-4398-1192-4",
LCCN = "QA76.88 .H34 2011",
bibdate = "Wed Sep 15 13:26:35 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = "Chapman and Hall/CRC computational science series",
acknowledgement = ack-nhfb,
subject = "high performance computing",
tableofcontents = "Modern Processors \\
Stored-program computer architecture \\
General-purpose cache-based microprocessor architecture
\\
Memory hierarchies \\
Multicore processors \\
Multithreaded processors \\
Vector processors \\
\\
Basic Optimization Techniques for Serial Code \\
Scalar profiling \\
Common sense optimizations \\
Simple measures, large impact \\
The role of compilers \\
C++ optimizations \\
\\
Data Access Optimization \\
Balance analysis and lightspeed estimates \\
Storage order \\
Case study: The Jacobi algorithm \\
Case study: Dense matrix transpose \\
Algorithm classification and access optimizations \\
Case study: Sparse matrix-vector multiply \\
\\
Parallel Computers \\
Taxonomy of parallel computing paradigms \\
Shared-memory computers \\
Distributed-memory computers \\
Hierarchical (hybrid) systems \\
Networks \\
\\
Basics of Parallelization \\
Why parallelize? \\
Parallelism \\
Parallel scalability \\
\\
Shared-Memory Parallel Programming with OpenMP \\
Short introduction to OpenMP \\
Case study: OpenMP-parallel Jacobi algorithm \\
Advanced OpenMP: Wavefront parallelization \\
\\
Efficient OpenMP Programming \\
Profiling OpenMP programs \\
Performance pitfalls \\
Case study: Parallel sparse matrix-vector multiply \\
\\
Locality Optimizations on ccNUMA Architectures \\
Locality of access on ccNUMA \\
Case study: ccNUMA optimization of sparse MVM \\
Placement pitfalls \\
ccNUMA issues with C++ \\
\\
Distributed-Memory Parallel Programming with MPI \\
Message passing \\
A short introduction to MPI \\
Example: MPI parallelization of a Jacobi solver \\
\\
Efficient MPI Programming \\
MPI performance tools \\
Communication parameters \\
Synchronization, serialization, contention \\
Reducing communication overhead \\
Understanding intranode point-to-point communication
\\
Hybrid Parallelization with MPI and OpenMP \\
Basic MPI/OpenMP programming models \\
MPI taxonomy of thread interoperability \\
Hybrid decomposition and mapping \\
Potential benefits and drawbacks of hybrid programming
\\
Appendix A: Topology and Affinity in Multicore
Environments \\
Appendix B: Solutions to the Problems \\
\\
Bibliography \\
\\
Index",
}
@Article{Han:2011:HHL,
author = "Tianyi David Han and Tarek S. Abdelrahman",
title = "{hiCUDA}: High-Level {GPGPU} Programming",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "22",
number = "1",
pages = "78--90",
month = jan,
year = "2011",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2010.62",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Feb 25 14:08:57 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Harvey:2011:STP,
author = "M. J. Harvey and G. {De Fabritiis}",
title = "{Swan}: a tool for porting {CUDA} programs to
{OpenCL}",
journal = j-COMP-PHYS-COMM,
volume = "182",
number = "4",
pages = "1093--1099",
month = apr,
year = "2011",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2010.12.052",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 10:10:57 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465511000117",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Hawick:2011:HSL,
author = "K. A. Hawick and D. P. Playne",
title = "Hypercubic storage layout and transforms in arbitrary
dimensions using {GPUs} and {CUDA}",
journal = j-CCPE,
volume = "23",
number = "10",
pages = "1027--1050",
month = jul,
year = "2011",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1628",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:56 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "28 Aug 2010",
}
@Article{Hawick:2011:RLS,
author = "K. A. Hawick and A. Leist and D. P. Playne",
title = "Regular Lattice and Small-World Spin Model Simulations
Using {CUDA} and {GPUs}",
journal = j-INT-J-PARALLEL-PROG,
volume = "39",
number = "2",
pages = "183--201",
month = apr,
year = "2011",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Tue Sep 6 21:08:27 MDT 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=39&issue=2;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=39&issue=2&spage=183",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Hinde:2011:QMD,
author = "Robert J. Hinde",
title = "{QSATS}: {MPI}-driven quantum simulations of atomic
solids at zero temperature",
journal = j-COMP-PHYS-COMM,
volume = "182",
number = "11",
pages = "2339--2349",
month = nov,
year = "2011",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2011.04.024",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 10:11:00 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465511001615",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Hoefler:2011:SPT,
author = "Torsten Hoefler and Rolf Rabenseifner and Hubert
Ritzdorf and Bronis R. de Supinski and Rajeev Thakur
and Jesper Larsson Tr{\"a}ff",
title = "The scalable process topology interface of {MPI 2.2}",
journal = j-CCPE,
volume = "23",
number = "4",
pages = "293--310",
day = "25",
month = mar,
year = "2011",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1643",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:53 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "28 Aug 2010",
}
@Article{Hong:2011:ACG,
author = "Sungpack Hong and Sang Kyun Kim and Tayo Oguntebi and
Kunle Olukotun",
title = "Accelerating {CUDA} graph algorithms at maximum warp",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "267--276",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941590",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "Graphs are powerful data representations favored in
many computational domains. Modern GPUs have recently
shown promising results in accelerating computationally
challenging graph problems but their performance
suffered heavily when the graph structure is highly
irregular, as most real-world graphs tend to be. In
this study, we first observe that the poor performance
is caused by work imbalance and is an artifact of a
discrepancy between the GPU programming model and the
underlying GPU architecture.We then propose a novel
virtual warp-centric programming method that exposes
the traits of underlying GPU architectures to users.
Our method significantly improves the performance of
applications with heavily imbalanced workloads, and
enables trade-offs between workload imbalance and ALU
underutilization for fine-tuning the performance. Our
evaluation reveals that our method exhibits up to 9x
speedup over previous GPU algorithms and 12x over
single thread CPU execution on irregular graphs. When
properly configured, it also yields up to 30\%
improvement over previous GPU algorithms on regular
graphs. In addition to performance gains on graph
algorithms, our programming method achieves 1.3x to
15.1x speedup on a set of GPU benchmark applications.
Our study also confirms that the performance gap
between GPUs and other multi-threaded CPU graph
implementations is primarily due to the large
difference in memory bandwidth.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@InProceedings{Houzeaux:2011:HMO,
author = "G. Houzeaux and M. V{\'a}zquez and X. S{\'a}ez and J.
M. Cela",
title = "Hybrid {MPI--OpenMP} performance in massively parallel
computational fluid dynamics",
crossref = "Tromeur-Dervout:2011:PCF",
volume = "74",
pages = "293--297",
year = "2011",
DOI = "https://doi.org/10.1007/978-3-642-14438-7_31",
bibdate = "Sat Dec 22 08:34:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-14438-7_31",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-14438-7",
book-URL = "http://www.springerlink.com/content/978-3-642-14438-7",
}
@Article{Hussain:2011:PIA,
author = "Masroor Hussain and Muhammad Abid and Mushtaq Ahmad
and Ashfaq Khokhar and Arif Masud",
title = "A Parallel Implementation of {ALE} Moving Mesh
Technique for {FSI} Problems using {OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "39",
number = "6",
pages = "717--745",
month = dec,
year = "2011",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Tue Sep 6 21:08:54 MDT 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=39&issue=6;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=39&issue=6&spage=717",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Jin:2011:HPC,
author = "Haoqiang Jin and Dennis Jespersen and Piyush Mehrotra
and Rupak Biswas and Lei Huang and Barbara Chapman",
title = "High performance computing using {MPI} and {OpenMP} on
multi-core parallel systems",
journal = j-PARALLEL-COMPUTING,
volume = "37",
number = "9",
pages = "562--575",
month = sep,
year = "2011",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.02.002",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000159",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Kalentev:2011:CCL,
author = "Oleksandr Kalentev and Abha Rai and Stefan Kemnitz and
Ralf Schneider",
title = "Connected component labeling on a {$2$D} grid using
{CUDA}",
journal = j-J-PAR-DIST-COMP,
volume = "71",
number = "4",
pages = "615--620",
month = apr,
year = "2011",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Feb 25 19:11:50 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Kim:2011:ASC,
author = "Jungwon Kim and Honggyu Kim and Joo Hwan Lee and
Jaejin Lee",
title = "Achieving a single compute device image in {OpenCL}
for multiple {GPUs}",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "277--288",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941591",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In this paper, we propose an OpenCL framework that
combines multiple GPUs and treats them as a single
compute device. Providing a single virtual compute
device image to the user makes an OpenCL application
written for a single GPU portable to the platform that
has multiple GPU devices. It also makes the application
exploit full computing power of the multiple GPU
devices and the total amount of GPU memories available
in the platform. Our OpenCL framework automatically
distributes at run-time the OpenCL kernel written for a
single GPU into multiple CUDA kernels that execute on
the multiple GPU devices. It applies a run-time memory
access range analysis to the kernel by performing a
sampling run and identifies an optimal workload
distribution for the kernel. To achieve a single
compute device image, the runtime maintains virtual
device memory that is allocated in the main memory. The
OpenCL runtime treats the memory as if it were the
memory of a single GPU device and keeps it consistent
to the memories of the multiple GPU devices. Our
OpenCL-C-to-C translator generates the sampling code
from the OpenCL kernel code and OpenCL-C-to-CUDA-C
translator generates the CUDA kernel code for the
distributed OpenCL kernel. We show the effectiveness of
our OpenCL framework by implementing the OpenCL runtime
and two source-to-source translators. We evaluate its
performance with a system that contains 8 GPUs using 11
OpenCL benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Kolonias:2011:DIE,
author = "Vasileios Kolonias and Artemios G. Voyiatzis and
George Goulas and Efthymios Housos",
title = "Design and implementation of an efficient integer
count sort in {CUDA GPUs}",
journal = j-CCPE,
volume = "23",
number = "18",
pages = "2365--2381",
day = "25",
month = dec,
year = "2011",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1776",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:09:01 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "22 Jun 2011",
}
@Article{Li:2011:FSM,
author = "Guodong Li and Robert Palmer and Michael DeLisi and
Ganesh Gopalakrishnan and Robert M. Kirby",
title = "Formal specification of {MPI 2.0}: {Case} study in
specifying a practical concurrent programming {API}",
journal = j-SCI-COMPUT-PROGRAM,
volume = "76",
number = "2",
pages = "65--81",
day = "1",
month = feb,
year = "2011",
CODEN = "SCPGD4",
ISSN = "0167-6423 (print), 1872-7964 (electronic)",
ISSN-L = "0167-6423",
bibdate = "Fri Apr 1 18:39:40 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/01676423",
acknowledgement = ack-nhfb,
fjournal = "Science of Computer Programming",
journal-URL = "http://www.sciencedirect.com/science/journal/01676423",
}
@Article{Liao:2011:DEM,
author = "Wei-keng Liao",
title = "Design and Evaluation of {MPI} File Domain
Partitioning Methods under Extent-Based File Locking
Protocol",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "22",
number = "2",
pages = "260--272",
month = feb,
year = "2011",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2010.74",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Feb 25 14:08:57 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Lim:2011:ATC,
author = "Min Yeol Lim and Vincent W. Freeh and David K.
Lowenthal",
title = "Adaptive, transparent {CPU} scaling algorithms
leveraging inter-node {MPI} communication regions",
journal = j-PARALLEL-COMPUTING,
volume = "37",
number = "10--11",
pages = "667--683",
month = oct # "\slash " # nov,
year = "2011",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.07.001",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:36 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000871",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Liu:2011:CBA,
author = "Weiguo Liu and Bertil Schmidt and Wolfgang
Muller-Wittig",
title = "{CUDA-BLASTP}: Accelerating {BLASTP} on {CUDA}-Enabled
Graphics Hardware",
journal = j-TCBB,
volume = "8",
number = "6",
pages = "1678--1684",
month = nov,
year = "2011",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2011.33",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Sun Nov 6 06:45:50 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Michailidis:2011:PDM,
author = "Panagiotis D. Michailidis and Konstantinos G.
Margaritis",
title = "Parallel direct methods for solving the system of
linear equations with pipelining on a multicore using
{OpenMP}",
journal = j-J-COMPUT-APPL-MATH,
volume = "236",
number = "3",
pages = "326--341",
day = "1",
month = sep,
year = "2011",
CODEN = "JCAMDI",
ISSN = "0377-0427 (print), 1879-1778 (electronic)",
ISSN-L = "0377-0427",
bibdate = "Sat Feb 25 13:24:37 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputapplmath2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0377042711004183",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational and Applied Mathematics",
journal-URL = "http://www.sciencedirect.com/science/journal/03770427",
}
@Article{Mininni:2011:HMO,
author = "Pablo D. Mininni and Duane Rosenberg and Raghu Reddy
and Annick Pouquet",
title = "A hybrid {MPI--OpenMP} scheme for scalable parallel
pseudospectral computations for fluid turbulence",
journal = j-PARALLEL-COMPUTING,
volume = "37",
number = "6--7",
pages = "316--326",
month = jun # "\slash " # jul,
year = "2011",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.05.004",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000512",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Mokbel:2011:ASR,
author = "Mohammed F. Mokbel and Robert D. Kent and Michael
Wong",
title = "An Abstract Semantically Rich Compiler Collocative and
Interpretative Model for {OpenMP} Programs",
journal = j-COMP-J,
volume = "54",
number = "8",
pages = "1325--1343",
month = aug,
year = "2011",
CODEN = "CMPJA6",
DOI = "https://doi.org/10.1093/comjnl/bxr029",
ISSN = "0010-4620 (print), 1460-2067 (electronic)",
ISSN-L = "0010-4620",
bibdate = "Wed Aug 17 16:34:11 MDT 2011",
bibsource = "http://comjnl.oxfordjournals.org/content/54/8.toc;
http://www.math.utah.edu/pub/tex/bib/compj2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://comjnl.oxfordjournals.org/content/54/8/1325.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "The Computer Journal",
journal-URL = "http://comjnl.oxfordjournals.org/",
onlinedate = "April 5, 2011",
}
@Article{Pennycook:2011:PAH,
author = "S. J. Pennycook and S. D. Hammond and S. A. Jarvis and
G. R. Mudalige",
title = "Performance analysis of a hybrid {MPI\slash CUDA}
implementation of the {NASLU} benchmark",
journal = j-SIGMETRICS,
volume = "38",
number = "4",
pages = "23--29",
month = mar,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1964218.1964223",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Fri Apr 1 23:02:55 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
remark = "Special issue on the 1st international workshop on
performance modeling, benchmarking and simulation of
high performance computing systems (PMBS 10).",
}
@Article{Peters:2011:FPC,
author = "Hagen Peters and Ole Schulz-Hildebrandt and Norbert
Luttenberger",
title = "Fast in-place, comparison-based sorting with {CUDA}: a
study with bitonic sort",
journal = j-CCPE,
volume = "23",
number = "7",
pages = "681--693",
month = may,
year = "2011",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1686",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:55 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "14 Jan 2011",
}
@Article{Plimpton:2011:MML,
author = "Steven J. Plimpton and Karen D. Devine",
title = "{MapReduce} in {MPI} for large-scale graph
algorithms",
journal = j-PARALLEL-COMPUTING,
volume = "37",
number = "9",
pages = "610--632",
month = sep,
year = "2011",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.02.004",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111000172",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Siegel:2011:AFV,
author = "Stephen F. Siegel and Timothy K. Zirkel",
title = "Automatic formal verification of {MPI}-based parallel
programs",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "309--310",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941603",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "The Toolkit for Accurate Scientific Software (TASS) is
a suite of tools for the formal verification of
MPI-based parallel programs used in computational
science. TASS can verify various safety properties as
well as compare two programs for functional
equivalence. The TASS front end takes an integer $ n
\geq 1 $ and a C/MPI program, and constructs an
abstract model of the program with $n$ processes.
Procedures, structs, (multi-dimensional) arrays,
heap-allocated data, pointers, and pointer arithmetic
are all representable in a TASS model. The model is
then explored using symbolic execution and explicit
state space enumeration. A number of techniques are
used to reduce the time and memory consumed. A variety
of realistic MPI programs have been verified with TASS,
including Jacobi iteration and manager-worker type
programs, and some subtle defects have been discovered.
TASS is written in Java and is available from
\path=http://vsl.cis.udel.edu/tass= under the Gnu
Public License.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Sintorn:2011:EAF,
author = "Erik Sintorn and Ola Olsson and Ulf Assarsson",
title = "An efficient alias-free shadow algorithm for opaque
and transparent objects using per-triangle shadow
volumes",
journal = j-TOG,
volume = "30",
number = "6",
pages = "153:1--153:??",
month = dec,
year = "2011",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/2070781.2024187",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Mon Dec 19 15:59:18 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tog/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
abstract = "This paper presents a novel method for generating
pixel-accurate shadows from point light-sources in
real-time. The new method is able to quickly cull
pixels that are not in shadow and to trivially accept
large chunks of pixels thanks mainly to using the whole
triangle shadow volume as a primitive, instead of
rendering the shadow quads independently as in the
classic Shadow-Volume algorithm. Our CUDA
implementation outperforms z-fail consistently and
surpasses z-pass at high resolutions, although these
latter two are hardware accelerated, while inheriting
none of the robustness issues associated with these
methods. Another, perhaps even more important property
of our algorithm, is that it requires no pre-processing
or identification of silhouette edges and so robustly
and efficiently handles arbitrary triangle soups.",
acknowledgement = ack-nhfb,
articleno = "153",
fjournal = "ACM Transactions on Graphics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778",
}
@InProceedings{Smelyanskiy:2011:HPL,
author = "Mikhail Smelyanskiy and Karthikeyan Vaidyanathan and
Jee Choi and B{\'a}lint Jo{\'o} and Jatin Chhugani and
Michael A. Clark and Pradeep Dubey",
title = "High-performance lattice {QCD} for multi-core based
parallel systems using a cache-friendly hybrid
threaded-{MPI} approach",
crossref = "Lathrop:2011:SPI",
pages = "69:1--69:11",
year = "2011",
DOI = "https://doi.org/10.1145/2063384.2063477",
bibdate = "Fri Dec 16 11:05:47 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib",
acknowledgement = ack-nhfb,
articleno = "69",
}
@Article{Stpiczynski:2011:SKB,
author = "Przemyslaw Stpiczy{\'n}ski and Joanna Potiopa",
title = "Solving a kind of boundary-value problem for ordinary
differential equations using {Fermi} --- The next
generation {CUDA} computing architecture",
journal = j-J-COMPUT-APPL-MATH,
volume = "236",
number = "3",
pages = "384--393",
day = "1",
month = sep,
year = "2011",
CODEN = "JCAMDI",
ISSN = "0377-0427 (print), 1879-1778 (electronic)",
ISSN-L = "0377-0427",
bibdate = "Sat Feb 25 13:24:37 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputapplmath2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0377042711004237",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational and Applied Mathematics",
journal-URL = "http://www.sciencedirect.com/science/journal/03770427",
}
@Article{Szalay:2011:FCD,
author = "Zs{\'o}fia Szalay and J{\'a}nos Rohonczy",
title = "Fast calculation of {DNMR} spectra on {CUDA}-enabled
graphics card",
journal = j-J-COMPUT-CHEM,
volume = "32",
number = "7",
pages = "1262--1270",
month = may,
year = "2011",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.21706",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Thu Nov 29 14:55:32 MST 2012",
bibsource = "http://www.interscience.wiley.com/jpages/0192-8651;
http://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X",
onlinedate = "29 Nov 2010",
}
@Article{vanderLaan:2011:AWL,
author = "Wladimir J. van der Laan and Andrei C. Jalba and Jos
B. T. M. Roerdink",
title = "Accelerating Wavelet Lifting on Graphics Hardware
Using {CUDA}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "22",
number = "1",
pages = "132--146",
month = jan,
year = "2011",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2010.143",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Feb 25 14:08:57 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Wittenbrink:2011:FGG,
author = "Craig M. Wittenbrink and Emmett Kilgariff and Arjun
Prabhu",
title = "{Fermi GF100 GPU} Architecture",
journal = j-IEEE-MICRO,
volume = "31",
number = "2",
pages = "50--59",
month = mar # "\slash " # apr,
year = "2011",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2011.24",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Tue Apr 26 13:50:28 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/hot-chips.bib;
http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The Fermi GF100 is a GPU architecture that provides
several new capabilities beyond the Nvidia GT200 or
Tesla architecture. The Fermi architecture offers up to
512 CUDA cores and special features for gaming and
high-performance computing. This article describes the
GPU's new capabilities for tessellation, physics
processing, and computational graphics.",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
keywords = "Hot Chips 22 conference proceedings",
}
@Article{Wong:2011:EMS,
author = "Hon-Cheng Wong and Un-Hong Wong and Xueshang Feng and
Zesheng Tang",
title = "Efficient magnetohydrodynamic simulations on graphics
processing units with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "182",
number = "10",
pages = "2132--2160",
month = oct,
year = "2011",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2011.05.011",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 10:11:00 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465511001676",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Wu:2011:PCH,
author = "Xingfu Wu and Valerie Taylor",
title = "Performance characteristics of hybrid {MPI\slash
OpenMP} implementations of {NAS} parallel benchmarks
{SP} and {BT} on large-scale multicore supercomputers",
journal = j-SIGMETRICS,
volume = "38",
number = "4",
pages = "56--62",
month = mar,
year = "2011",
CODEN = "????",
DOI = "https://doi.org/10.1145/1964218.1964228",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Fri Apr 1 23:02:55 MDT 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
remark = "Special issue on the 1st international workshop on
performance modeling, benchmarking and simulation of
high performance computing systems (PMBS 10).",
}
@Article{Yang:2011:HCO,
author = "Chao-Tung Yang and Chih-Lin Huang and Cheng-Fang Lin",
title = "Hybrid {CUDA}, {OpenMP}, and {MPI} parallel
programming on multicore {GPU} clusters",
journal = j-COMP-PHYS-COMM,
volume = "182",
number = "1",
pages = "266--269",
month = jan,
year = "2011",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2010.06.035",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 10:10:55 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465510002262",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Yang:2011:PBP,
author = "Chao-Tung Yang and Chao-Chin Wu and Jen-Hsiang Chang",
title = "Performance-based parallel loop self-scheduling using
hybrid {OpenMP} and {MPI} programming on multicore
{SMP} clusters",
journal = j-CCPE,
volume = "23",
number = "8",
pages = "721--744",
day = "10",
month = jun,
year = "2011",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1627",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Dec 5 10:08:55 MST 2011",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "26 Sep 2010",
}
@Article{Yilmaz:2011:RMS,
author = "Erdal Yilmaz and Eray Molla and Cansin Yildiz and
Veysi Isler",
title = "Realistic modeling of spectator behavior for soccer
videogames with {CUDA}",
journal = j-COMPUTERS-AND-GRAPHICS,
volume = "35",
number = "6",
pages = "1063--1069",
month = dec,
year = "2011",
CODEN = "COGRD2",
DOI = "https://doi.org/10.1016/j.cag.2011.10.001",
ISSN = "0097-8493 (print), 1873-7684 (electronic)",
ISSN-L = "0097-8493",
bibdate = "Mon Feb 13 16:42:03 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compgraph.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/00978493",
URL = "http://www.sciencedirect.com/science/article/pii/S0097849311001476",
acknowledgement = ack-nhfb,
fjournal = "Computers \& Graphics",
journal-URL = "http://www.sciencedirect.com/science/journal/00978493",
}
@InProceedings{Zhai:2011:CVH,
author = "Yan Zhai and Mingliang Liu and Jidong Zhai and
Xiaosong Ma and Wenguang Chen",
title = "Cloud versus in-house cluster: evaluating {Amazon}
cluster compute instances for running {MPI}
applications",
crossref = "ACM:2011:SSP",
pages = "11:1--11:10",
year = "2011",
DOI = "https://doi.org/10.1145/2063348.2063363",
bibdate = "Fri Dec 16 11:19:26 MST 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib",
acknowledgement = ack-nhfb,
articleno = "11",
keywords = "Amazon EC2 CCI; IB cluster (InfiniBand)",
}
@Article{Zheng:2011:GLO,
author = "Mai Zheng and Vignesh T. Ravi and Feng Qin and Gagan
Agrawal",
title = "{GRace}: a low-overhead mechanism for detecting data
races in {GPU} programs",
journal = j-SIGPLAN,
volume = "46",
number = "8",
pages = "135--146",
month = aug,
year = "2011",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2038037.1941574",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 14:04:45 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '11 Conference proceedings.",
abstract = "In recent years, GPUs have emerged as an extremely
cost-effective means for achieving high performance.
Many application developers, including those with no
prior parallel programming experience, are now trying
to scale their applications using GPUs. While languages
like CUDA and OpenCL have eased GPU programming for
non-graphical applications, they are still explicitly
parallel languages. All parallel programmers,
particularly the novices, need tools that can help
ensuring the correctness of their programs. Like any
multithreaded environment, data races on GPUs can
severely affect the program reliability. Thus, tool
support for detecting race conditions can significantly
benefit GPU application developers. Existing approaches
for detecting data races on CPUs or GPUs have one or
more of the following limitations: (1) being ill-suited
for handling non-lock synchronization primitives on
GPUs; (2) lacking of scalability due to the state
explosion problem; (3) reporting many false positives
because of simplified modeling; and/or (4) incurring
prohibitive runtime and space overhead. In this paper,
we propose GRace, a new mechanism for detecting races
in GPU programs that combines static analysis with a
carefully designed dynamic checker for logging and
analyzing information at runtime. Our design utilizes
GPUs memory hierarchy to log runtime data accesses
efficiently. To improve the performance, GRace
leverages static analysis to reduce the number of
statements that need to be instrumented. Additionally,
by exploiting the knowledge of thread scheduling and
the execution model in the underlying GPUs, GRace can
accurately detect data races with no false positives
reported. Based on the above idea, we have built a
prototype of GRace with two schemes, i.e., GRace-stmt
and GRace-addr, for NVIDIA GPUs. Both schemes are
integrated with the same static analysis. We have
evaluated GRace-stmt and GRace-addr with three data
race bugs in three GPU kernel functions and also have
compared them with the existing approach, referred to
as B-tool. Our experimental results show that both
schemes of GRace are effective in detecting all
evaluated cases with no false positives, whereas Btool
reports many false positives for one evaluated case. On
the one hand, GRace-addr incurs low runtime overhead,
i.e., 22-116\%, and low space overhead, i.e., 9-18MB,
for the evaluated kernels. On the other hand,
GRace-stmt offers more help in diagnosing data races
with larger overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Agathos:2012:TBE,
author = "Spiros N. Agathos and Panagiotis E. Hadjidoukas and
Vassilios V. Dimakopoulos",
title = "Task-Based Execution of Nested {OpenMP} Loops",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "210--222",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_16",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_16/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Amritkar:2012:OPF,
author = "Amit Amritkar and Danesh Tafti and Rui Liu and Rick
Kufrin and Barbara Chapman",
title = "{OpenMP} parallelism for fluid and fluid-particulate
systems",
journal = j-PARALLEL-COMPUTING,
volume = "38",
number = "9",
pages = "501--517",
month = sep,
year = "2012",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2012.05.005",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jul 30 14:28:54 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819112000476",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Andersch:2012:PPE,
author = "Michael Andersch and Chi Ching Chi and Ben Juurlink",
title = "Programming parallel embedded and consumer
applications in {OpenMP} superscalar",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "281--282",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145854",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In this paper, we evaluate the performance and
usability of the parallel programming model OpenMP
Superscalar (OmpSs), apply it to 10 different
benchmarks and compare its performance with
corresponding POSIX threads implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Misc{Anonymous:2012:CTC,
author = "Anonymous",
title = "{CUDA Toolkit 5.0 CURAND} Guide",
howpublished = "Web document",
year = "2012",
bibdate = "Sat Feb 08 18:16:05 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://docs.nvidia.com/cuda/pdf/CURAND_Library.pdf",
acknowledgement = ack-nhfb,
keywords = "random-number generator",
}
@Article{Baskaran:2012:ACO,
author = "Muthu Manikandan Baskaran and Nicolas Vasilache and
Benoit Meister and Richard Lethin",
title = "Automatic communication optimizations through memory
reuse strategies",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "277--278",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145852",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Modern parallel architectures are emerging with
sophisticated hardware consisting of hierarchically
placed parallel processors and memories. The properties
of memories in a system vary wildly, not only
quantitatively (size, latency, bandwidth, number of
banks) but also qualitatively (scratchpad, cache).
Along with the emergence of such architectures comes
the need for effectively utilizing the parallel
processors and properly managing data movement across
memories to improve memory bandwidth and hide data
transfer latency. In this paper, we describe some of
the high-level optimizations that are targeted at the
improvement of memory performance in the R-Stream
compiler, a high-level source-to-source automatic
parallelizing compiler. We direct our focus in this
paper on optimizing communications (data transfers) by
improving memory reuse at various levels of an explicit
memory hierarchy. This general concept is well-suited
to the hardware properties of GPGPUs, which is the
architecture that we concentrate on for this paper. We
apply our techniques and obtain performance improvement
on various stencil kernels including an important
iterative stencil kernel in seismic processing
applications where the performance is comparable to
that of the state-of-the-art implementation of the
kernel by a CUDA expert.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Bawidamann:2012:ETO,
author = "Uwe Bawidamann and Marco Nehmeier",
title = "Expression Templates and {OpenCL}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7204",
pages = "71--80",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-31500-8_8",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:26:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-31500-8_8/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-31500-8",
book-URL = "http://www.springerlink.com/content/978-3-642-31500-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Berg:2012:FCL,
author = "Bernd A. Berg and Hao Wu",
title = "{Fortran} code for {$ {\rm SU}(3) $} lattice gauge
theory with and without {MPI} checkerboard
parallelization",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "10",
pages = "2145--2157",
month = oct,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2012.03.021",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu Jun 28 15:53:26 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465512001269",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Bergstrom:2012:NDP,
author = "Lars Bergstrom and John Reppy",
title = "Nested data-parallelism on the {GPU}",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "247--258",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364563",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics processing units (GPUs) provide both memory
bandwidth and arithmetic performance far greater than
that available on CPUs but, because of their
Single-Instruction-Multiple-Data (SIMD) architecture,
they are hard to program. Most of the programs ported
to GPUs thus far use traditional data-level
parallelism, performing only operations that operate
uniformly over vectors. NESL is a first-order
functional language that was designed to allow
programmers to write irregular-parallel programs ---
such as parallel divide-and-conquer algorithms --- for
wide-vector parallel computers. This paper presents our
port of the NESL implementation to work on GPUs and
provides empirical evidence that nested
data-parallelism (NDP) on GPUs significantly
outperforms CPU-based implementations and matches or
beats newer GPU languages that support only flat
parallelism. While our performance does not match that
of hand-tuned CUDA programs, we argue that the
notational conciseness of NESL is worth the loss in
performance. This work provides the first language
implementation that directly supports NDP on a GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ICFP '12 conference proceedings.",
}
@Article{Berka:2012:PET,
author = "Tobias Berka and Helge Hagenauer and Marian
Vajter{\v{s}}ic",
title = "Portable Explicit Threading and Concurrent Programming
for {MPI} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7204",
pages = "81--90",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-31500-8_9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:26:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-31500-8_9/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-31500-8",
book-URL = "http://www.springerlink.com/content/978-3-642-31500-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Betts:2012:GVG,
author = "Adam Betts and Nathan Chong and Alastair Donaldson and
Shaz Qadeer and Paul Thomson",
title = "{GPUVerify}: a verifier for {GPU} kernels",
journal = j-SIGPLAN,
volume = "47",
number = "10",
pages = "113--132",
month = oct,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398857.2384625",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:23 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present a technique for verifying race- and
divergence-freedom of GPU kernels that are written in
mainstream kernel programming languages such as OpenCL
and CUDA. Our approach is founded on a novel formal
operational semantics for GPU programming termed
synchronous, delayed visibility (SDV) semantics. The
SDV semantics provides a precise definition of barrier
divergence in GPU kernels and allows kernel
verification to be reduced to analysis of a sequential
program, thereby completely avoiding the need to reason
about thread interleavings, and allowing existing
modular techniques for program verification to be
leveraged. We describe an efficient encoding for data
race detection and propose a method for automatically
inferring loop invariants required for verification. We
have implemented these techniques as a practical
verification tool, GPUVerify, which can be applied
directly to OpenCL and CUDA source code. We evaluate
GPUVerify with respect to a set of 163 kernels drawn
from public and commercial sources. Our evaluation
demonstrates that GPUVerify is capable of efficient,
automatic verification of a large number of real-world
kernels.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "OOPSLA '12 conference proceedings.",
}
@Article{Bihari:2012:CIT,
author = "Barna L. Bihari and Michael Wong and Amy Wang and
Bronis R. de Supinski and Wang Chen",
title = "A Case for Including Transactions in {OpenMP} {II}:
Hardware Transactional Memory",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "44--58",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_4",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_4/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Blattner:2012:PSC,
author = "Timothy Blattner and Shiming Yang",
title = "Performance study on {CUDA GPUs} for parallelizing the
local ensemble transformed {Kalman} filter algorithm",
journal = j-CCPE,
volume = "24",
number = "2",
pages = "167--177",
month = feb,
year = "2012",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1859",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Apr 4 09:18:00 MDT 2012",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "17 Oct 2011",
}
@Article{Broquedis:2012:LEO,
author = "Fran{\c{c}}ois Broquedis and Thierry Gautier and
Vincent Danjean",
title = "{libOMP}, an Efficient {OpenMP} Runtime System for
Both Fork-Join and Data Flow Paradigms",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "102--115",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_8",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_8/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bruning:2012:MFT,
author = "Ulrich Br{\"u}ning",
title = "{MPI} Functions and Their Impact on Interconnect
Hardware",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "10--10",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_2",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_2",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Bureddy:2012:OGM,
author = "D. Bureddy and H. Wang and A. Venkatesh and S. Potluri
and D. K. Panda",
title = "{OMB-GPU}: a Micro-Benchmark Suite for Evaluating
{MPI} Libraries on {GPU} Clusters",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "110--120",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_16",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_16/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Busa:2012:ACO,
author = "J{\'a}n {Busa, Jr.} and Shura Hayryan and Ming-Chya Wu
and J{\'a}n Busa and Chin-Kun Hu",
title = "{ARVO-CL}: the {OpenCL} version of the {ARVO} package
--- An efficient tool for computing the accessible
surface area and the excluded volume of proteins via
analytical equations",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "11",
pages = "2494--2497",
month = nov,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2012.04.019",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jul 27 07:00:54 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465512001580",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Bustamam:2012:FPM,
author = "Alhadi Bustamam and Kevin Burrage and Nicholas A.
Hamilton",
title = "Fast Parallel {Markov} Clustering in Bioinformatics
Using Massively Parallel Computing on {GPU} with {CUDA}
and {ELLPACK-R} Sparse Format",
journal = j-TCBB,
volume = "9",
number = "3",
pages = "679--692",
month = may,
year = "2012",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2011.68",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Thu Apr 19 17:58:10 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "Markov clustering (MCL) is becoming a key algorithm
within bioinformatics for determining clusters in
networks. However, with increasing vast amount of data
on biological networks, performance and scalability
issues are becoming a critical limiting factor in
applications. Meanwhile, GPU computing, which uses CUDA
tool for implementing a massively parallel computing
environment in the GPU card, is becoming a very
powerful, efficient, and low-cost option to achieve
substantial performance gains over CPU approaches. The
use of on-chip memory on the GPU is efficiently
lowering the latency time, thus, circumventing a major
issue in other parallel computing environments, such as
MPI. We introduce a very fast Markov clustering
algorithm using CUDA (CUDA-MCL) to perform parallel
sparse matrix-matrix computations and parallel sparse
Markov matrix normalizations, which are at the heart of
MCL. We utilized ELLPACK-R sparse format to allow the
effective and fine-grain massively parallel processing
to cope with the sparse nature of interaction networks
data sets in bioinformatics applications. As the
results show, CUDA-MCL is significantly faster than the
original MCL running on CPU. Thus, large-scale parallel
computation on off-the-shelf desktop-machines, that
were previously only possible on supercomputing
architectures, can significantly change the way
bioinformaticians and biologists deal with their
data.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Cabarle:2012:SNP,
author = "Francis George C. Cabarle and Henry Adorna and Miguel
A. Mart{\'\i}nez",
title = "A Spiking Neural {P} System Simulator Based on
{CUDA}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7184",
pages = "87--103",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-28024-5_8",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:25:48 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-28024-5_8/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-28024-5",
book-URL = "http://www.springerlink.com/content/978-3-642-28024-5",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Calotoiu:2012:PID,
author = "Alexandru Calotoiu and Christian Siebert and Felix
Wolf",
title = "Pattern-Independent Detection of Manual Collectives in
{MPI} Programs",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7484",
pages = "28--39",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-32820-6_5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:34 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-32820-6_5/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-32820-6",
book-URL = "http://www.springerlink.com/content/978-3-642-32820-6",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Cecilia:2012:CSC,
author = "Jos{\'e} Mar{\'\i}a Cecilia and Jos{\'e} Manuel
Garc{\'\i}a and Manuel Ujald{\'o}n",
title = "{CUDA $2$D} Stencil Computations for the {Jacobi}
Method",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7133",
pages = "173--183",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-28151-8_17",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:24:46 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-28151-8_17/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-28151-8",
book-URL = "http://www.springerlink.com/content/978-3-642-28151-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Chen:2012:PUA,
author = "Yifeng Chen and Xiang Cui and Hong Mei",
title = "{PARRAY}: a unifying array representation for
heterogeneous parallelism",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "171--180",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145838",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "This paper introduces a programming interface called
PARRAY (or Parallelizing ARRAYs) that supports
system-level succinct programming for heterogeneous
parallel systems like GPU clusters. The current
practice of software development requires combining
several low-level libraries like Pthread, OpenMP, CUDA
and MPI. Achieving productivity and portability is hard
with different numbers and models of GPUs. PARRAY
extends mainstream C programming with novel array types
of distinct features: (1) the dimensions of an array
type are nested in a tree, conceptually reflecting the
memory hierarchy; (2) the definition of an array type
may contain references to other array types, allowing
sophisticated array types to be created for
parallelization; (3) threads also form arrays that
allow programming in a
Single-Program-Multiple-Codeblock (SPMC) style to unify
various sophisticated communication patterns. This
leads to shorter, more portable and maintainable
parallel codes, while the programmer still has control
over performance-related features necessary for deep
manual optimization. Although the source-to-source code
generator only faithfully generates low-level library
calls according to the type information, higher-level
programming and automatic performance optimization are
still possible through building libraries of
sub-programs on top of PARRAY. The case study on
cluster FFT illustrates a simple 30-line code that 2x
outperforms Intel Cluster MKL on the Tianhe-1A system
with 7168 Fermi GPUs and 14336 CPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Chevitarese:2012:STN,
author = "Daniel Salles Chevitarese and Dilza Szwarcman and
Marley Vellasco",
title = "Speeding Up the Training of Neural Networks with
{CUDA} Technology",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7267",
pages = "30--38",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-29347-4_4",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:18:50 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012d.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-29347-4_4/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-29347-4",
book-URL = "http://www.springerlink.com/content/978-3-642-29347-4",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Collingbourne:2012:STO,
author = "Peter Collingbourne and Cristian Cadar and Paul H. J.
Kelly",
title = "Symbolic Testing of {OpenCL} Code",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7261",
pages = "203--218",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-34188-5_18",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:18:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012d.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-34188-5_18/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-34188-5",
book-URL = "http://www.springerlink.com/content/978-3-642-34188-5",
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Cui:2012:OOB,
author = "Zheng Cui and Lei Xia and Patrick G. Bridges and Peter
A. Dinda and John R. Lange",
title = "Optimizing overlay-based virtual networking through
optimistic interrupts and cut-through forwarding",
crossref = "Hollingsworth:2012:SPI",
pages = "99:1--99:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a029.pdf",
abstract = "Overlay-based virtual networking provides a powerful
model for realizing virtual distributed and parallel
computing systems with strong isolation, portability,
and recoverability properties. However, in extremely
high throughput and low latency networks, such overlays
can suffer from bandwidth and latency limitations,
which is of particular concern if we want to apply the
model in HPC environments. Through careful study of an
existing very high performance overlay-based virtual
network system, we have identified two core issues
limiting performance: delayed and/or excessive virtual
interrupt delivery into guests, and copies between host
and guest data buffers done during encapsulation. We
respond with two novel optimizations: optimistic,
timer-free virtual interrupt injection, and zero-copy
cut-through data forwarding. These optimizations
improve the latency and bandwidth of the overlay
network on 10 Gbps interconnects, resulting in
near-native performance for a wide range of
microbenchmarks and MPI application benchmarks.",
acknowledgement = ack-nhfb,
articleno = "99",
}
@Article{Danalis:2012:MCT,
author = "Anthony Danalis",
title = "{MPI} and Compiler Technology: a Love-Hate
Relationship",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "12--13",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_4",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_4",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{delaAsuncion:2012:MCI,
author = "Marc de la Asunci{\'o}n and Jos{\'e} M. Mantas and
Manuel J. Castro and E. D. Fern{\'a}ndez-Nieto",
title = "An {MPI-CUDA} implementation of an improved {Roe}
method for two-layer shallow water systems",
journal = j-J-PAR-DIST-COMP,
volume = "72",
number = "9",
pages = "1065--1072",
month = sep,
year = "2012",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2011.07.012",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 27 06:43:44 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S074373151100147X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Deshpande:2012:AGC,
author = "Vivek Deshpande and Xing Wu and Frank Mueller",
title = "Auto-generation of communication benchmark traces",
journal = j-SIGMETRICS,
volume = "40",
number = "2",
pages = "99--105",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2381056.2381078",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Fri Nov 9 11:06:40 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigmetrics.bib",
abstract = "Benchmarks are essential for evaluating HPC hardware
and software for petascale machines and beyond. But
benchmark creation is a tedious manual process. As a
result, benchmarks tend to lag behind the development
of complex scientific codes. Our work automates the
creation of communication benchmarks. Given an MPI
application, we utilize ScalaTrace, a lossless and
scalable framework to trace communication operations
and execution time while abstracting away the
computations. A single trace file that reflects the
behavior of all nodes is subsequently expanded to C
source code by a novel code generator. This resulting
benchmark code is compact, portable, human-readable,
and accurately reflects the original application's
communication characteristics and performance.
Experimental results demonstrate that generated source
code of benchmarks preserves both the communication
patterns and the run-time behavior of the original
application. Such automatically generated benchmarks
not only shorten the transition from application
development to benchmark extraction but also facilitate
code obfuscation, which is essential for benchmark
extraction from commercial and restricted
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
}
@Article{Deuzeman:2012:LMP,
author = "Albert Deuzeman and Siebren Reker and Carsten Urbach
and {ETM Collaboration}",
title = "{Lemon}: An {MPI} parallel {I/O} library for data
encapsulation using {LIME}",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "6",
pages = "1321--1335",
month = jun,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2012.01.016",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Feb 29 07:07:40 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465512000318",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Diaz:2012:CCF,
author = "M. J. Castro D{\'\i}az and E. Fern{\'a}ndez-Nieto",
title = "A Class of Computationally Fast First Order Finite
Volume Solvers: {PVM} Methods",
journal = j-SIAM-J-SCI-COMP,
volume = "34",
number = "4",
pages = "A2173--A2196",
month = "????",
year = "2012",
CODEN = "SJOCE3",
DOI = "https://doi.org/10.1137/100795280",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
bibdate = "Tue Oct 30 14:49:05 MDT 2012",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/34/4;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib",
acknowledgement = ack-nhfb,
fjournal = "SIAM Journal on Scientific Computing",
journal-URL = "http://epubs.siam.org/sisc",
onlinedate = "January 2012",
}
@Article{Didelot:2012:IMC,
author = "Sylvain Didelot and Patrick Carribault and Marc
P{\'e}rache and William Jalby",
title = "Improving {MPI} Communication Overlap with
Collaborative Polling",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "37--46",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_9/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Dinan:2012:EMC,
author = "James Dinan and David Goodell and William Gropp and
Rajeev Thakur and Pavan Balaji",
title = "Efficient Multithreaded Context {ID} Allocation in
{MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "57--66",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_11",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_11/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Du:2012:COT,
author = "Peng Du and Rick Weber and Piotr Luszczek and
Stanimire Tomov and Gregory Peterson and Jack
Dongarra",
title = "From {CUDA} to {OpenCL}: Towards a
performance-portable solution for multi-platform {GPU}
programming",
journal = j-PARALLEL-COMPUTING,
volume = "38",
number = "8",
pages = "391--407",
month = aug,
year = "2012",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.10.002",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Jun 20 17:04:05 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001335",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Dziubak:2012:OOI,
author = "Tomasz Dziubak and Jacek Matulewski",
title = "An object-oriented implementation of a solver of the
time-dependent {Schr{\"o}dinger} equation using the
{CUDA} technology",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "3",
pages = "800--812",
month = mar,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2011.11.026",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 10:11:02 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465511003948",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Eichenberger:2012:DOT,
author = "Alexandre E. Eichenberger and Christian Terboven and
Michael Wong and Dieter an Mey",
title = "The Design of {OpenMP} Thread Affinity",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "15--28",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_2",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_2/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{ElZein:2012:GOC,
author = "Ahmed H. {El Zein} and Alistair P. Rendell",
title = "Generating optimal {CUDA} sparse matrix--vector
product implementations for evolving {GPU} hardware",
journal = j-CCPE,
volume = "24",
number = "1",
pages = "3--13",
month = jan,
year = "2012",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1732",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Jan 16 12:11:17 MST 2012",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "17 Apr 2011",
}
@InProceedings{Fiala:2012:DCS,
author = "David Fiala and Frank Mueller and Christian Engelmann
and Rolf Riesen and Kurt Ferreira and Ron Brightwell",
title = "Detection and correction of silent data corruption for
large-scale high-performance computing",
crossref = "Hollingsworth:2012:SPI",
pages = "78:1--78:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a046.pdf",
abstract = "Faults have become the norm rather than the exception
for high-end computing clusters. Exacerbating this
situation, some of these faults remain undetected,
manifesting themselves as silent errors that allow
applications to compute incorrect results. This paper
studies the potential for redundancy to detect and
correct soft errors in MPI message-passing applications
while investigating the challenges inherent to
detecting soft errors within MPI applications by
providing transparent MPI redundancy. By assuming a
model wherein corruption in application data manifests
itself by producing differing MPI messages between
replicas, we study the best suited protocols for
detecting and correcting corrupted MPI messages. Using
our fault injector, we observe that even a single error
can have profound effects on applications by causing a
cascading pattern of corruption which in most cases
spreads to all other processes. Results indicate that
our consistency protocols can successfully protect
applications experiencing even high rates of silent
data corruption.",
acknowledgement = ack-nhfb,
articleno = "78",
}
@Article{Filgueira:2012:DCD,
author = "Rosa Filgueira and Jes{\'u}s Carretero and David E.
Singh and Alejandro Calder{\'o}n and Alberto
N{\'u}{\~n}ez",
title = "{Dynamic--CoMPI}: dynamic optimization techniques for
{MPI} parallel applications",
journal = j-J-SUPERCOMPUTING,
volume = "59",
number = "1",
pages = "361--391",
month = jan,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Dec 13 15:25:33 MST 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=1&spage=361",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Garcia:2012:DLB,
author = "Marta Garcia and Julita Corbalan and Rosa Maria Badia
and Jesus Labarta",
title = "A Dynamic Load Balancing Approach with
{SMPSuperscalar} and {MPI}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7174",
pages = "10--23",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30397-5_2",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:25:38 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30397-5_2/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30397-5",
book-URL = "http://www.springerlink.com/content/978-3-642-30397-5",
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Garland:2012:DUP,
author = "Michael Garland and Manjunath Kudlur and Yili Zheng",
title = "Designing a unified programming model for
heterogeneous machines",
crossref = "Hollingsworth:2012:SPI",
pages = "67:1--67:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a064.pdf",
abstract = "While high-efficiency machines are increasingly
embracing heterogeneous architectures and massive
multithreading, contemporary mainstream programming
languages reflect a mental model in which processing
elements are homogeneous, concurrency is limited, and
memory is a flat undifferentiated pool of storage.
Moreover, the current state of the art in programming
heterogeneous machines tends towards using separate
programming models, such as OpenMP and CUDA, for
different portions of the machine. Both of these
factors make programming emerging heterogeneous
machines unnecessarily difficult. We describe the
design of the Phalanx programming model, which seeks to
provide a unified programming model for heterogeneous
machines. It provides constructs for bulk parallelism,
synchronization, and data placement which operate
across the entire machine. Our prototype implementation
is able to launch and coordinate work on both CPU and
GPU processors within a single node, and by leveraging
the GASNet runtime, is able to run across all the nodes
of a distributed-memory machine.",
acknowledgement = ack-nhfb,
articleno = "67",
}
@Article{Ghosh:2012:RAA,
author = "Sudeep Ghosh and Jason Hiser and Jack W. Davidson",
title = "Replacement attacks against {VM}-protected
applications",
journal = j-SIGPLAN,
volume = "47",
number = "7",
pages = "203--214",
month = jul,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2365864.2151051",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Sep 6 10:01:03 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "VEE '12 conference proceedings.",
abstract = "Process-level virtualization is increasingly being
used to enhance the security of software applications
from reverse engineering and unauthorized modification
(called software protection). Process-level virtual
machines (PVMs) can safeguard the application code at
run time and hamper the adversary's ability to launch
dynamic attacks on the application. This dynamic
protection, combined with its flexibility, ease in
handling legacy systems and low performance overhead,
has made process-level virtualization a popular
approach for providing software protection. While there
has been much research on using process-level
virtualization to provide such protection, there has
been less research on attacks against PVM-protected
software. In this paper, we describe an attack on
applications protected using process-level
virtualization, called a replacement attack. In a
replacement attack, the adversary replaces the
protecting PVM with an attack VM thereby rendering the
application vulnerable to analysis and modification. We
present a general description of the replacement attack
methodology and two attack implementations against a
protected application using freely available tools. The
generality and simplicity of replacement attacks
demonstrates that there is a strong need to develop
techniques that meld applications more tightly to the
protecting PVM to prevent such attacks.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Gong:2012:OCN,
author = "Yifan Gong and Bingsheng He and Jianlong Zhong",
title = "An overview of {CMPI}: network performance aware {MPI}
in the cloud",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "297--298",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145862",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Cloud computing enables users to perform distributed
computing tasks on many virtual machines, without
owning a physical cluster. Recently, various
distributed computing tasks such as scientific
applications are being moved from supercomputers and
private clusters to public clouds. Message passing
interface (MPI) is a key and common component in
distributed computing tasks. The virtualized computing
environment of the public cloud hides the network
topology information from the users, and existing
topology-aware optimizations for MPI are no longer
feasible in the cloud environment. We propose a network
performance aware MPI library named CMPI. CMPI embraces
a new model for capturing the network performance among
different virtual machines in the cloud. Based on the
network performance model, we develop novel network
performance aware algorithms for communication
operations. This poster gives an overview of CMPI
design, and presents some preliminary results on
collective operations such as broadcast.We demonstrate
the effectiveness of our network performance aware
optimizations on Amazon EC2.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Gravvanis:2012:SFD,
author = "G. A. Gravvanis and C. K. Filelis-Papadopoulos and K.
M. Giannoutakis",
title = "Solving finite difference linear systems on {GPUs}:
{CUDA} based Parallel Explicit Preconditioned
Biconjugate Conjugate Gradient type Methods",
journal = j-J-SUPERCOMPUTING,
volume = "61",
number = "3",
pages = "590--604",
month = sep,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Oct 26 07:41:53 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=61&issue=3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=61&issue=3&spage=590",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Gropp:2012:AMI,
author = "William Gropp and Ewing Lusk and Rajeev Thakur",
title = "Advanced {MPI} Including New {MPI-3} Features",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "14--14",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_5",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Gropp:2012:MBW,
author = "William Gropp",
title = "{MPI 3} and Beyond: Why {MPI} Is Successful and What
Challenges It Faces",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "1--9",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_1",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_1/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hermanns:2012:SDM,
author = "Marc-Andr{\'e} Hermanns and Markus Geimer and Bernd
Mohr and Felix Wolf",
title = "Scalable detection of {MPI-2} remote memory access
inefficiency patterns",
journal = j-IJHPCA,
volume = "26",
number = "3",
pages = "227--236",
month = aug,
year = "2012",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342011406758",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Thu Nov 8 11:31:14 MST 2012",
bibsource = "http://hpc.sagepub.com/content/26/3.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/26/3/227.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "June 8, 2011",
}
@InProceedings{Hilbrich:2012:MRE,
author = "Tobias Hilbrich and Joachim Protze and Martin Schulz
and Bronis R. de Supinski and Matthias S. M{\"u}ller",
title = "{MPI} runtime error detection with {MUST}: advances in
deadlock detection",
crossref = "Hollingsworth:2012:SPI",
pages = "30:1--30:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a010.pdf",
abstract = "The widely used Message Passing Interface (MPI) is
complex and rich. As a result, application developers
require automated tools to avoid and to detect MPI
programming errors. We present the Marmot Umpire
Scalable Tool (MUST) that detects such errors with
significantly increased scalability. We present
improvements to our graph-based deadlock detection
approach for MPI, which cover future MPI extensions.
Our enhancements also check complex MPI constructs that
no previous graph-based detection approach handled
correctly. Finally, we present optimizations for the
processing of MPI operations that reduce runtime
deadlock detection overheads. Existing approaches often
require O ( p ) analysis time per MPI operation, for p
processes. We empirically observe that our improvements
lead to sub-linear or better analysis time per
operation for a wide range of real world
applications.",
acknowledgement = ack-nhfb,
articleno = "30",
}
@Article{Hoefler:2012:LMO,
author = "Torsten Hoefler and James Dinan and Darius Buntinas
and Pavan Balaji and Brian W. Barrett",
title = "Leveraging {MPI}'s One-Sided Communication Interface
for Shared-Memory Programming",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "132--141",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_18",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_18/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Hoefler:2012:OPC,
author = "Torsten Hoefler and Timo Schneider",
title = "Optimization principles for collective neighborhood
communications",
crossref = "Hollingsworth:2012:SPI",
pages = "98:1--98:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a028.pdf",
abstract = "Many scientific applications operate in a
bulk-synchronous mode of iterative communication and
computation steps. Even though the communication steps
happen at the same logical time, important patterns
such as stencil computations cannot be expressed as
collective communications in MPI. We demonstrate how
neighborhood collective operations allow to specify
arbitrary collective communication relations during
run-time and enable optimizations similar to
traditional collective calls. We show a number of
optimization opportunities and algorithms for different
communication scenarios. We also show how users can
assert constraints that provide additional optimization
opportunities in a portable way. We demonstrate the
utility of all described optimizations in a highly
optimized implementation of neighborhood collective
operations. Our communication and protocol
optimizations result in a performance improvement of up
to a factor of two for small stencil communications. We
found that, for some patterns, our optimization
heuristics automatically generate communication
schedules that are comparable to hand-tuned
collectives. With those optimizations in place, we are
able to accelerate arbitrary collective communication
patterns, such as regular and irregular stencils with
optimization methods for collective communications. We
expect that our methods will influence the design of
future MPI libraries and provide a significant
performance benefit on large-scale systems.",
acknowledgement = ack-nhfb,
articleno = "98",
}
@Article{Hori:2012:EKL,
author = "Atsushi Hori and Toyohisa Kameyama and Yuichi Tsujita
and Mitaro Namiki and Yutaka Ishikawa",
title = "An Efficient Kernel-Level Blocking {MPI}
Implementation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "153--162",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_20",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_20/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Hormati:2012:SPS,
author = "Amir H. Hormati and Mehrzad Samadi and Mark Woh and
Trevor Mudge and Scott Mahlke",
title = "{Sponge}: portable stream programming on graphics
engines",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "381--392",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950409",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Graphics processing units (GPUs) provide a low cost
platform for accelerating high performance
computations. The introduction of new programming
languages, such as CUDA and OpenCL, makes GPU
programming attractive to a wide variety of
programmers. However, programming GPUs is still a
cumbersome task for two primary reasons: tedious
performance optimizations and lack of portability.
First, optimizing an algorithm for a specific GPU is a
time-consuming task that requires a thorough
understanding of both the algorithm and the underlying
hardware. Unoptimized CUDA programs typically only
achieve a small fraction of the peak GPU performance.
Second, GPU code lacks efficient portability as code
written for one GPU can be inefficient when executed on
another. Moving code from one GPU to another while
maintaining the desired performance is a non-trivial
task often requiring significant modifications to
account for the hardware differences. In this work, we
propose Sponge, a compilation framework for GPUs using
synchronous data flow streaming languages. Sponge is
capable of performing a wide variety of optimizations
to generate efficient code for graphics engines. Sponge
alleviates the problems associated with current GPU
programming methods by providing portability across
different generations of GPUs and CPUs, and a better
abstraction of the hardware details, such as the memory
hierarchy and threading model. Using streaming, we
provide a write-once software paradigm and rely on the
compiler to automatically create optimized CUDA code
for a wide variety of GPU targets. Sponge's compiler
optimizations improve the performance of the baseline
CUDA implementations by an average of 3.2x.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Hosking:2012:CHL,
author = "Tony Hosking",
title = "Compiling a high-level language for {GPUs}: (via
language support for architectures and compilers)",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "1--12",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254066",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2000.bib;
http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "Languages such as OpenCL and CUDA offer a standard
interface for general-purpose programming of GPUs.
However, with these languages, programmers must
explicitly manage numerous low-level details involving
communication and synchronization. This burden makes
programming GPUs difficult and error-prone, rendering
these powerful devices inaccessible to most
programmers. We desire a higher-level programming model
that makes GPUs more accessible while also effectively
exploiting their computational power. This paper
presents features of Lime, a new Java-compatible
language targeting heterogeneous systems, that allow an
optimizing compiler to generate high quality GPU code.
The key insight is that the language type system
enforces isolation and immutability invariants that
allow the compiler to optimize for a GPU without heroic
compiler analysis. Our compiler attains GPU speedups
between 75\% and 140\% of the performance of native
OpenCL code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Hursey:2012:AFA,
author = "Joshua Hursey and Richard L. Graham",
title = "Analyzing fault aware collective performance in a
process fault tolerant {MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "38",
number = "1--2",
pages = "15--25",
month = jan # "\slash " # feb,
year = "2012",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.10.010",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:36 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001414",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Book{Hwu:2012:GCG,
editor = "Wen-mei Hwu",
title = "{GPU} computing gems",
publisher = "Morgan Kaufmann",
address = "Boston, MA",
edition = "Jade",
pages = "xvi + 541 + 16",
year = "2012",
ISBN = "0-12-385963-8 (hardback)",
ISBN-13 = "978-0-12-385963-1 (hardback)",
LCCN = "T385 .G6875 2012",
bibdate = "Sat Feb 8 18:16:05 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/elefunt.bib;
http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = "Applications of GPU computing series",
abstract = "Since the introduction of CUDA in 2007, more than 100
million computers with CUDA capable GPUs have been
shipped to end users. GPU computing application
developers can now expect their application to have a
mass market. With the introduction of OpenCL in 2010,
researchers can now expect to develop GPU applications
that can run on hardware from multiple vendors.",
acknowledgement = ack-nhfb,
subject = "Graphics processing units; Programming; Imaging
systems; Computer graphics; Image processing; Digital
techniques",
tableofcontents = "Part 1: Parallel Algorithms and Data Structures ---
Paulius Micikevicius, NVIDIA \\
1 Large-Scale GPU Search \\
2 Edge v. Node Parallelism for Graph Centrality Metrics
\\
3 Optimizing parallel prefix operations for the Fermi
architecture \\
4 Building an Efficient Hash Table on the GPU \\
5 An Efficient CUDA Algorithm for the Maximum Network
Flow Problem \\
6 On Improved Memory Access Patterns for Cellular
Automata Using CUDA \\
7 Fast Minimum Spanning Tree Computation on Large
Graphs \\
8 Fast in-place sorting with CUDA based on bitonic sort
\\
Part 2: Numerical Algorithms --- Frank Jargstorff,
NVIDIA \\
9 Interval Arithmetic in CUDA \\
10 Approximating the erfinv Function \\
11 A Hybrid Method for Solving Tridiagonal Systems on
the GPU \\
12 LU Decomposition in CULA \\
13 GPU Accelerated Derivative-free Optimization \\
Part 3: Engineering Simulation --- Peng Wang, NVIDIA
\\
14 Large-scale gas turbine simulations on GPU clusters
\\
15 GPU acceleration of rarefied gas dynamic simulations
\\
16 Assembly of Finite Element Methods on Graphics
Processors \\
17 CUDA implementation of Vertex-Centered, Finite
Volume CFD methods on Unstructured Grids with Flow
Control Applications \\
18 Solving Wave Equations on Unstructured Geometries
\\
19 Fast electromagnetic integral equation solvers on
graphics processing units (GPUs) \\
Part 4: Interactive Physics for Games and Engineering
Simulation --- Richard Tonge, NVIDIA \\
20 Solving Large Multi-Body Dynamics Problems on the
GPU \\
21 Implicit FEM Solver in CUDA \\
22 Real-time Adaptive GPU multi-agent path planning \\
Part 5: Computational Finance --- Thomas Bradley,
NVIDIA \\
23 High performance finite difference PDE solvers on
GPUs for financial option pricing \\
24 Identifying and Mitigating Credit Risk using
Large-scale Economic Capital Simulations \\
25 Financial Market Value-at-Risk Estimation using the
Monte Carlo Method \\
Part 6: Programming Tools and Techniques --- Cliff
Wooley, NVIDIA \\
26 Thrust: A Productivity-Oriented Library for CUDA \\
27 GPU Scripting and Code Generation with PyCUDA \\
28 Jacket: GPU Powered MATLAB Acceleration \\
29 Accelerating Development and Execution Speed with
Just In Time GPU Code Generation \\
30 GPU Application Development, Debugging, and
Performance Tuning with GPU Ocelot \\
31 Abstraction for AoS and SoA Layout in C++ \\
32 Processing Device Arrays with C++ Metaprogramming
\\
33 GPU Metaprogramming: A Case Study in
Biologically-Inspired Machine Vision \\
34 A Hybridization Methodology for High-Performance
Linear Algebra Software for GPUs \\
35 Dynamic Load Balancing using Work-Stealing \\
36 Applying software-managed caching and CPU/GPU task
scheduling for accelerating dynamic workloads",
}
@Article{Jiang:2012:OSP,
author = "Lei Jiang and Pragneshkumar B. Patel and George
Ostrouchov and Ferdinand Jamitzky",
title = "{OpenMP}-style parallelism in data-centered multicore
computing with {R}",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "335--336",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145882",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "R$^1$ is a domain specific language widely used for
data analysis by the statistics community as well as by
researchers in finance, biology, social sciences, and
many other disciplines. As R programs are linked to
input data, the exponential growth of available data
makes high-performance computing with R imperative. To
ease the process of writing parallel programs in R,
code transformation from a sequential program to a
parallel version would bring much convenience to R
users. In this paper, we present our work in
semi-automatic parallelization of R codes with
user-added OpenMP-style pragmas. While such pragmas are
used at the frontend, we take advantage of multiple
parallel backends with different R packages. We provide
flexibility for importing parallelism with plug-in
components, impose built-in MapReduce for data
processing, and also maintain code reusability. We
illustrate the advantage of the on-the-fly mechanisms
which can lead to significant applications in
data-centered parallel computing.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Johnson:2012:FOL,
author = "Tim Johnson and Pierre Fite-Georgel and Rahul Raguram
and Jan-Michael Frahm",
title = "Fast Organization of Large Photo Collections Using
{CUDA}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "6554",
pages = "463--476",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-35740-4_36",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Dec 24 08:20:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-35740-4_36",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-35740-4",
book-URL = "http://www.springerlink.com/content/978-3-642-35740-4",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kakimoto:2012:PCG,
author = "Takeshi Kakimoto and Keisuke Dohi and Yuichiro Shibata
and Kiyoshi Oguri",
title = "Performance comparison of {GPU} programming frameworks
with the striped {Smith--Waterman} algorithm",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "70--75",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460229",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This paper evaluates and discusses how different GPU
programming frameworks affect the performance obtained
from GPU acceleration of the striped smith-waterman
algorithm used for biological sequence alignment. A
total of 6 GPU implementations of the algorithm on
NVIDIA GT200b and AMD RV870 using the CUDA and the
OpenCL frameworks are compared to analyze cons and pros
of explicit descriptions for architecture specific
hardware mechanisms in the code. The evaluation results
show that the primitive descriptions with the CUDA are
still efficient especially for small size data, while
better instruction scheduling and optimizations are
carried out by the OpenCL compiler. On the other hand,
the combination of OpenCL and RV870 which provides a
relatively simple view of the architecture is efficient
for the large data size.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Kanal:2012:MMC,
author = "M. E. Kanal and M. Demiralp",
title = "A modified method of calculating {High Dimensional
Model Representation (HDMR) Terms} for parallelization
with {MPI} and {CUDA}",
journal = j-J-SUPERCOMPUTING,
volume = "62",
number = "1",
pages = "199--213",
month = oct,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Oct 26 07:42:33 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=62&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=62&issue=1&spage=199",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Kanal:2012:PAI,
author = "M. E. Kanal",
title = "Parallel algorithm on inversion for adjacent
pentadiagonal matrices with {MPI}",
journal = j-J-SUPERCOMPUTING,
volume = "59",
number = "2",
pages = "1071--1078",
month = feb,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Apr 6 17:44:43 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=2&spage=1071",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Karrenberg:2012:IPO,
author = "Ralf Karrenberg and Sebastian Hack",
title = "Improving Performance of {OpenCL} on {CPUs}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7210",
pages = "1--20",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-28652-0_1",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:26:22 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-28652-0_1/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-28652-0",
book-URL = "http://www.springerlink.com/content/978-3-642-28652-0",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Kim:2012:OUP,
author = "Jungwon Kim and Sangmin Seo and Jun Lee and Jeongho
Nah and Gangwon Jo and Jaejin Lee",
title = "{OpenCL} as a unified programming model for
heterogeneous {CPU\slash GPU} clusters",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "299--300",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145863",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "In this paper, we propose an OpenCL framework for
heterogeneous CPU/GPU clusters, and show that the
framework achieves both high performance and ease of
programming. The framework provides an illusion of a
single system for the user. It allows the application
to utilize multiple heterogeneous compute devices, such
as multicore CPUs and GPUs, in a remote node as if they
were in a local node. No communication API, such as the
MPI library, is required in the application source. We
implement the OpenCL framework and evaluate its
performance on a heterogeneous CPU/GPU cluster that
consists of one host node and nine compute nodes using
eleven OpenCL benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Kjolstad:2012:ADG,
author = "Fredrik Kjolstad and Torsten Hoefler and Marc Snir",
title = "Automatic datatype generation and optimization",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "327--328",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145878",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Many high performance applications spend considerable
time packing noncontiguous data into contiguous
communication buffers. MPI Datatypes provide an
alternative by describing noncontiguous data layouts.
This allows sophisticated hardware to retrieve data
directly from application data structures. However,
packing codes in real-world applications are often
complex and specifying equivalent datatypes is
difficult, time-consuming, and error prone. We present
an algorithm that automates the transformation. We have
implemented the algorithm in a tool that transforms
packing code to MPI Datatypes, and evaluated it by
transforming 90 packing codes from the NAS Parallel
Benchmarks. The transformation allows easy porting of
applications to new machines that benefit from
datatypes, thus improving programmer productivity.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Klemm:2012:EOV,
author = "Michael Klemm and Alejandro Duran and Xinmin Tian and
Hideki Saito and Diego Caballero",
title = "Extending {OpenMP*} with Vector Constructs for Modern
Multicore {SIMD} Architectures",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "59--72",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_5/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Klockner:2012:PPS,
author = "Andreas Kl{\"o}ckner and Nicolas Pinto and Yunsup Lee
and Bryan Catanzaro and Paul Ivanov and Ahmed Fasih",
title = "{PyCUDA} and {PyOpenCL}: a scripting-based approach to
{GPU} run-time code generation",
journal = j-PARALLEL-COMPUTING,
volume = "38",
number = "3",
pages = "157--174",
month = mar,
year = "2012",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.09.001",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:36 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001281",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Kwon:2012:HAO,
author = "Okwan Kwon and Fahed Jubair and Rudolf Eigenmann and
Samuel Midkiff",
title = "A hybrid approach of {OpenMP} for clusters",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "75--84",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145827",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We present the first fully automated compiler-runtime
system that successfully translates and executes OpenMP
shared-address-space programs on laboratory-size
clusters, for the complete set of regular, repetitive
applications in the NAS Parallel Benchmarks. We
introduce a hybrid compiler-runtime translation scheme.
Compared to previous work, this scheme features a new
runtime data flow analysis and new compiler techniques
for improving data affinity and reducing communication
costs. We present and discuss the performance of our
translated programs, and compare them with the
performance of the MPI, HPF and UPC versions of the
benchmarks. The results show that our translated
programs achieve 75\% of the hand-coded MPI programs,
on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Lashuk:2012:MPA,
author = "Ilya Lashuk and Aparna Chandramowlishwaran and Harper
Langston and Tuan-Anh Nguyen and Rahul Sampath and
Aashay Shringarpure and Richard Vuduc and Lexing Ying
and Denis Zorin and George Biros",
title = "A massively parallel adaptive fast multipole method on
heterogeneous architectures",
journal = j-CACM,
volume = "55",
number = "5",
pages = "101--109",
month = may,
year = "2012",
CODEN = "CACMA2",
DOI = "https://doi.org/10.1145/2160718.2160740",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Wed May 9 07:19:14 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/cacm/;
http://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
http://www.math.utah.edu/pub/tex/bib/cacm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "We describe a parallel fast multipole method (FMM) for
highly nonuniform distributions of particles. We employ
both distributed memory parallelism (via MPI) and
shared memory parallelism (via OpenMP and GPU
acceleration) to rapidly evaluate two-body
nonoscillatory potentials in three dimensions on
heterogeneous high performance computing architectures.
We have performed scalability tests with up to 30
billion particles on 196,608 cores on the
AMD/CRAY-based Jaguar system at ORNL. On a GPU-enabled
system (NSF's Keeneland at Georgia Tech/ORNL), we
observed 30$ \times $ speedup over a single core CPU
and 7$ \times $ speedup over a multicore CPU
implementation. By combining GPUs with MPI, we achieve
less than 10 ns/particle and six digits of accuracy for
a run with 48 million nonuniformly distributed
particles on 192 GPUs.",
acknowledgement = ack-nhfb,
fjournal = "Communications of the ACM",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79",
}
@InProceedings{Lee:2012:EED,
author = "Seyong Lee and Jeffrey S. Vetter",
title = "Early evaluation of directive-based {GPU} programming
models for productive exascale computing",
crossref = "Hollingsworth:2012:SPI",
pages = "23:1--23:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a051.pdf",
abstract = "Graphics Processing Unit (GPU)-based parallel computer
architectures have shown increased popularity as a
building block for high performance computing, and
possibly for future Exascale computing. However, their
programming complexity remains as a major hurdle for
their widespread adoption. To provide better
abstractions for programming GPU architectures,
researchers and vendors have proposed several
directive-based GPU programming models. These
directive-based models provide different levels of
abstraction, and required different levels of
programming effort to port and optimize applications.
Understanding these differences among these new models
provides valuable insights on their applicability and
performance potential. In this paper, we evaluate
existing directive-based models by porting thirteen
application kernels from various scientific domains to
use CUDA GPUs, which, in turn, allows us to identify
important issues in the functionality, scalability,
tunability, and debuggability of the existing models.
Our evaluation shows that directive-based models can
achieve reasonable performance, compared to
hand-written GPU codes.",
acknowledgement = ack-nhfb,
articleno = "23",
}
@InProceedings{Lee:2012:SMO,
author = "Jaejin Lee",
editor = "????",
booktitle = "{ATIP '12: Proceedings of the ATIP\slash A*CRC
Workshop on Accelerator Technologies for
High-Performance Computing: Does Asia Lead the Way?}",
title = "{SnuCL} and an {MPI $+$ OpenCL} implementation of
{HPL} on heterogeneous {CPU\slash GPU} clusters",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "??--??",
year = "2012",
ISBN = "1-4503-1644-1",
ISBN-13 = "978-1-4503-1644-6",
LCCN = "????",
bibdate = "Wed Nov 14 11:00:18 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
remark = "Publisher has only PDF of 28 lecture slides",
}
@InProceedings{Levesque:2012:HEA,
author = "John M. Levesque and Ramanan Sankaran and Ray Grout",
title = "Hybridizing {S3D} into an exascale application using
{OpenACC}: an approach for moving to multi-petaflops
and beyond",
crossref = "Hollingsworth:2012:SPI",
pages = "15:1--15:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a040.pdf",
abstract = "Hybridization is the process of converting an
application with a single level of parallelism to an
application with multiple levels of parallelism. Over
the past 15 years a majority of the applications that
run on High Performance Computing systems have employed
MPI for all of the parallelism within the application.
In the Peta-Exascale computing regime, effective
utilization of the hardware requires multiple levels of
parallelism matched to the macro architecture of the
system to achieve good performance. A hybridized code
base is performance portable when sufficient
parallelism is expressed in an architecture agnostic
form to achieve good performance on a range of
available systems. The hybridized S3D code is
performance portable across today's leading many core
and GPU accelerated systems. The OpenACC framework
allows a unified code base to be deployed for either
(Manycore CPU or Manycore CPU+GPU) while permitting
architecture specific optimizations to expose new
dimensions of parallelism to be utilized.",
acknowledgement = ack-nhfb,
articleno = "15",
}
@InProceedings{Li:2012:PFA,
author = "Peng Li and Guodong Li and Ganesh Gopalakrishnan",
title = "Parametric flows: automated behavior equivalencing for
symbolic analysis of races in {CUDA} programs",
crossref = "Hollingsworth:2012:SPI",
pages = "29:1--29:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a009.pdf",
abstract = "The growing scale of concurrency requires automated
abstraction techniques to cut down the effort in
concurrent system analysis. In this paper, we show that
the high degree of behavioral symmetry present in GPU
programs allows CUDA race detection to be dramatically
simplified through abstraction. Our abstraction
techniques is one of automatically creating parametric
flows ---control-flow equivalence classes of threads
that diverge in the same manner---and checking for data
races only across a pair of threads per parametric
flow. We have implemented this approach as an extension
of our recently proposed GKLEE symbolic analysis
framework and show that all our previous results are
dramatically improved in that (i) the parametric
flow-based analysis takes far less time, and (ii)
because of the much higher scalability of the analysis,
we can detect even more data race situations that were
previously missed by GKLEE because it was forced to
downscale examples to limit analysis complexity.
Moreover, the parametric flow-based analysis is
applicable to other programs with SPMD models.",
acknowledgement = ack-nhfb,
articleno = "29",
}
@Article{Lima:2012:PEO,
author = "Antonio M. Lima and Marco A. S. Netto and Thais Webber
and Ricardo M. Czekster and Cesar A. F. {De Rose} and
Paulo Fernandes",
title = "Performance evaluation of {OpenMP}-based algorithms
for handling {Kronecker} descriptors",
journal = j-J-PAR-DIST-COMP,
volume = "72",
number = "5",
pages = "678--692",
month = may,
year = "2012",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2012.02.001",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 28 08:37:48 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731512000354",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Ling:2012:HPP,
author = "Cheng Ling and Khaled Benkrid and Tsuyoshi Hamada",
title = "High performance phylogenetic analysis on
{CUDA}-compatible {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "52--57",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460226",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "The operation of phylogenetic analysis aims to
investigate the evolution and relationships among
species. It is widely used in the fields of system
biology and comparative genomics. However, phylogenetic
analysis is also a computationally intensive operation
as the number of tree topology grows in a factorial way
with the number of species involved. Therefore, due to
the large number of species in the real world, the
computational burden has largely thwarted phylogenetic
reconstruction. In this paper, we describe the detailed
GPU-based multi-threaded design and implementation of a
Markov Chain Monte Carlo (MCMC) maximum likelihood
algorithm for phylogenetic analysis on a set of aligned
nucleotide sequences. The implementation is based on
the framework of the most widely used phylogenetic
analysis tool, namely MrBayes. The proposed approach
resulted in 6x-8x speed-up on an NVidia Geforce 460 GTX
GPU compared to an optimized GPP-based software
implementation running on a desktop computer with a
single Intel Xeon 2.53 GHz CPU and 6.0 GB RAM.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Maheo:2012:AOL,
author = "Aur{\`e}le Mah{\'e}o and Souad Kolia{\"\i} and Patrick
Carribault and Marc P{\'e}rache and William Jalby",
title = "Adaptive {OpenMP} for Large {NUMA} Nodes",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "254--257",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_20",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_20/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mainland:2012:EHM,
author = "Geoffrey Mainland",
title = "Explicitly heterogeneous metaprogramming with
{MetaHaskell}",
journal = j-SIGPLAN,
volume = "47",
number = "9",
pages = "311--322",
month = sep,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2398856.2364572",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Nov 15 16:40:19 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Languages with support for metaprogramming, like
MetaOCaml, offer a principled approach to code
generation by guaranteeing that well-typed metaprograms
produce well-typed programs. However, many problem
domains where metaprogramming can fruitfully be applied
require generating code in languages like C, CUDA, or
assembly. Rather than resorting to add-hoc code
generation techniques, these applications should be
directly supported by explicitly heterogeneous
metaprogramming languages. We present MetaHaskell, an
extension of Haskell 98 that provides modular syntactic
and type system support for type safe metaprogramming
with multiple object languages. Adding a new object
language to MetaHaskell requires only minor
modifications to the host language to support
type-level quantification over object language types
and propagation of type equality constraints. We
demonstrate the flexibility of our approach through
three object languages: a core ML language, a linear
variant of the core ML language, and a subset of C. All
three languages support metaprogramming with open terms
and guarantee that well-typed MetaHaskell programs will
only produce closed object terms that are well-typed.
The essence of MetaHaskell is captured in a type system
for a simplified metalanguage. MetaHaskell, as well as
all three object languages, are fully implemented in
the mhc bytecode compiler.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ICFP '12 conference proceedings.",
}
@Article{Malits:2012:ELG,
author = "Roman Malits and Evgeny Bolotin and Avinoam Kolodny
and Avi Mendelson",
title = "Exploring the limits of {GPGPU} scheduling in control
flow bound applications",
journal = j-TACO,
volume = "8",
number = "4",
pages = "29:1--29:??",
month = jan,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2086696.2086708",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Sat Jan 21 07:49:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "GPGPUs are optimized for graphics, for that reason the
hardware is optimized for massively data parallel
applications characterized by predictable memory access
patterns and little control flow. For such
applications' e.g., matrix multiplication, GPGPU based
system can achieve very high performance. However, many
general purpose data parallel applications are
characterized as having intensive control flow and
unpredictable memory access patterns. Optimizing the
code in such problems for current hardware is often
ineffective and even impractical since it exhibits low
hardware utilization leading to relatively low
performance. This work tracks the root causes of
execution inefficacies when running control flow
intensive CUDA applications on NVIDIA GPGPU hardware.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Marongiu:2012:OCE,
author = "Andrea Marongiu and Luca Benini",
title = "An {OpenMP} Compiler for Efficient Use of Distributed
Scratchpad Memory in {MPSoCs}",
journal = j-IEEE-TRANS-COMPUT,
volume = "61",
number = "2",
pages = "222--236",
month = feb,
year = "2012",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2010.199",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Fri Jan 13 17:55:10 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Martins:2012:PDC,
author = "Wellington S. Martins and Thiago F. Rangel",
title = "Phylogenetic Distance Computation Using {CUDA}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7409",
pages = "168--178",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-31927-3_15",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:21:56 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012g.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-31927-3_15/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-31927-3",
book-URL = "http://www.springerlink.com/content/978-3-642-31927-3",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Massetto:2012:NSB,
author = "Francisco Isidro Massetto and Liria Matsumoto Sato and
Kuan-Ching Li",
title = "A novel strategy for building interoperable {MPI}
environment in heterogeneous high performance systems",
journal = j-J-SUPERCOMPUTING,
volume = "60",
number = "1",
pages = "87--116",
month = apr,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Apr 6 17:45:24 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=60&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=60&issue=1&spage=87",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Mehta:2012:SPE,
author = "Kshitij Mehta and Edgar Gabriel and Barbara Chapman",
title = "Specification and Performance Evaluation of Parallel
{I/O} Interfaces for {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "1--14",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_1",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_1/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Mittal:2012:CAS,
author = "Anshul Mittal and Nikhil Jain and Thomas George and
Yogish Sabharwal and Sameer Kumar",
title = "Collective algorithms for sub-communicators",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "315--316",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145872",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "Collective communication over a group of processors is
an integral and time consuming component in many HPC
applications. Many modern day supercomputers are based
on torus interconnects. On such systems, for an
irregular communicator comprising of a subset of
processors, the algorithms developed so far are not
contention free in general and hence non-optimal. In
this paper, we present a novel contention-free
algorithm to perform collective operations over a
subset of processors in a torus network. We also extend
previous work on regular communicators to handle
special cases of irregular communicators that occur
frequently in parallel scientific applications. For the
generic case where multiple node disjoint
sub-communicators communicate simultaneously in a
loosely synchronous fashion, we propose a novel
cooperative approach to route the data for individual
sub-communicators without contention. Empirical results
demonstrate that our algorithms outperform the
optimized MPI collective implementation on IBM's Blue
Gene/P supercomputer for large data sizes and random
node distributions.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Muller:2012:SOA,
author = "Matthias S. M{\"u}ller and John Baron and William C.
Brantley and Huiyu Feng and Daniel Hackenberg",
title = "{SPEC OMP2012}--- An Application Benchmark Suite for
Parallel Systems Using {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "223--236",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_17",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_17/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Neuberger:2012:MIS,
author = "John M. Neuberger and N{\'a}ndor Sieben and James W.
Swift",
title = "An {MPI} Implementation of a Self-Submitting Parallel
Job Queue",
journal = j-INT-J-PARALLEL-PROG,
volume = "40",
number = "4",
pages = "443--464",
month = aug,
year = "2012",
CODEN = "IJPPE5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Fri Oct 26 07:12:55 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=40&issue=4;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0885-7458&volume=40&issue=4&spage=443",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Ng:2012:STT,
author = "Nicholas Ng and Nobuko Yoshida and Xin Yu Niu and Kuen
Hung Tsoi",
title = "Session types: towards safe and fast reconfigurable
programming",
journal = j-COMP-ARCH-NEWS,
volume = "40",
number = "5",
pages = "22--27",
month = dec,
year = "2012",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2460216.2460221",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sun May 5 09:49:56 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "HEART '12 conference proceedings.",
abstract = "This paper introduces a new programming framework
based on the theory of session types for safe,
reconfigurable parallel designs. We apply the session
type theory to C and Java programming languages and
demonstrate that the session-based languages can offer
a clear and tractable framework to describe
communications between parallel components and
guarantee communication-safety and deadlock-freedom by
compile-time type checking. Many representative
communication topologies such as a ring or
scatter-gather can be programmed and verified in
session-based programming languages. Case studies
involving N-body simulation and Kmeans clustering are
used to illustrate the session-based programming style
and to demonstrate that the session-based languages
perform competitively against MPI counterparts in an
FPGA-based heterogeneous cluster, as well as the
potential of integrating them with FPGA acceleration.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@InProceedings{Nguyen:2012:BTM,
author = "Tan Nguyen and Pietro Cicotti and Eric Bylaska and Dan
Quinlan and Scott B. Baden",
title = "{Bamboo}: translating {MPI} applications to a
latency-tolerant, data-driven form",
crossref = "Hollingsworth:2012:SPI",
pages = "39:1--39:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a032.pdf",
abstract = "We present Bamboo, a custom source-to-source
translator that transforms MPI C source into a
data-driven form that automatically overlaps
communication with available computation. Running on up
to 98304 processors of NERSC's Hopper system, we
observe that Bamboo's overlap capability speeds up MPI
implementations of a 3D Jacobi iterative solver and
Cannon's matrix multiplication. Bamboo's generated code
meets or exceeds the performance of hand optimized MPI,
which includes split-phase coding, the method
classically employed to hide communication. We achieved
our results with only modest amounts of programmer
annotation and no intrusive reprogramming of the
original application source.",
acknowledgement = ack-nhfb,
articleno = "39",
}
@Article{Nguyen:2012:SCS,
author = "Donald Nguyen and Keshav Pingali",
title = "Synthesizing concurrent schedulers for irregular
algorithms",
journal = j-SIGPLAN,
volume = "47",
number = "4",
pages = "333--344",
month = apr,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2248487.1950404",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 7 08:15:03 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scheduling is the assignment of tasks or activities to
processors for execution, and it is an important
concern in parallel programming. Most prior work on
scheduling has focused either on static scheduling of
applications in which the dependence graph is known at
compile-time or on dynamic scheduling of independent
loop iterations such as in OpenMP. In irregular
algorithms, dependences between activities are complex
functions of runtime values so these algorithms are not
amenable to compile-time analysis nor do they consist
of independent activities. Moreover, the amount of work
can vary dramatically with the scheduling policy. To
handle these complexities, implementations of irregular
algorithms employ carefully handcrafted,
algorithm-specific schedulers but these schedulers are
themselves parallel programs, complicating the parallel
programming problem further. In this paper, we present
a flexible and efficient approach for specifying and
synthesizing scheduling policies for irregular
algorithms. We develop a simple compositional
specification language and show how it can concisely
encode scheduling policies in the literature. Then, we
show how to synthesize efficient parallel schedulers
from these specifications. We evaluate our approach for
five irregular algorithms on three multicore
architectures and show that (1) the performance of some
algorithms can improve by orders of magnitude with the
right scheduling policy, and (2) for the same policy,
the overheads of our synthesized schedulers are
comparable to those of fixed-function schedulers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '12 conference proceedings.",
}
@Article{Nobari:2012:SPM,
author = "Sadegh Nobari and Thanh-Tung Cao and Panagiotis Karras
and St{\'e}phane Bressan",
title = "Scalable parallel minimum spanning forest
computation",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "205--214",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145842",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "The proliferation of data in graph form calls for the
development of scalable graph algorithms that exploit
parallel processing environments. One such problem is
the computation of a graph's minimum spanning forest
(MSF). Past research has proposed several parallel
algorithms for this problem, yet none of them scales to
large, high-density graphs. In this paper we propose a
novel, scalable, parallel MSF algorithm for undirected
weighted graphs. Our algorithm leverages Prim's
algorithm in a parallel fashion, concurrently expanding
several subsets of the computed MSF. Our effort focuses
on minimizing the communication among different
processors without constraining the local growth of a
processor's computed subtree. In effect, we achieve a
scalability that previous approaches lacked. We
implement our algorithm in CUDA, running on a GPU and
study its performance using real and synthetic, sparse
as well as dense, structured and unstructured graph
data. Our experimental study demonstrates that our
algorithm outperforms the previous state-of-the-art
GPU-based MSF algorithm, while being several orders of
magnitude faster than sequential CPU-based
algorithms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Notz:2012:GBS,
author = "Patrick K. Notz and Roger P. Pawlowski and James C.
Sutherland",
title = "Graph-Based Software Design for Managing Complexity
and Enabling Concurrency in Multiphysics {PDE}
Software",
journal = j-TOMS,
volume = "39",
number = "1",
pages = "1:1--1:21",
month = nov,
year = "2012",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2382585.2382586",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Thu Dec 6 07:36:30 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "Multiphysics simulation software is plagued by
complexity stemming from nonlinearly coupled systems of
Partial Differential Equations (PDEs). Such software
typically supports many models, which may require
different transport equations, constitutive laws, and
equations of state. Strong coupling and a multiplicity
of models leads to complex algorithms (i.e., the
properly ordered sequence of steps to assemble a
discretized set of coupled PDEs) and rigid software.
This work presents a design strategy that shifts focus
away from high-level algorithmic concerns to low-level
data dependencies. Mathematical expressions are
represented as software objects that directly expose
data dependencies. The entire system of expressions
forms a directed acyclic graph and the high-level
assembly algorithm is generated automatically through
standard graph algorithms. This approach makes problems
with complex dependencies entirely tractable, and
removes virtually all logic from the algorithm itself.
Changes are highly localized, allowing developers to
implement models without detailed understanding of any
algorithms (i.e., the overall assembly process).
Furthermore, this approach complements existing
MPI-based frameworks and can be implemented within them
easily. Finally, this approach enables algorithmic
parallelization via threads. By exposing dependencies
in the algorithm explicitly, thread-based parallelism
is implemented through algorithm decomposition,
providing a basis for exploiting parallelism
independent from domain decomposition approaches.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@InProceedings{Nukada:2012:SMG,
author = "Akira Nukada and Kento Sato and Satoshi Matsuoka",
title = "Scalable multi-{GPU} {$3$-D} {FFT} for {TSUBAME 2.0}
supercomputer",
crossref = "Hollingsworth:2012:SPI",
pages = "44:1--44:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a044.pdf",
abstract = "For scalable 3-D FFT computation using multiple GPUs,
efficient all-to-all communication between GPUs is the
most important factor in good performance.
Implementations with point-to-point MPI library
functions and CUDA memory copy APIs typically exhibit
very large overheads especially for small message sizes
in all-to-all communications between many nodes. We
propose several schemes to minimize the overheads,
including employment of lower-level API of InfiniBand
to effectively overlap intra- and inter-node
communication, as well as auto-tuning strategies to
control scheduling and determine rail assignments. As a
result we achieve very good strong scalability as well
as good performance, up to 4.8TFLOPS using 256 nodes of
TSUBAME 2.0 Supercomputer (768 GPUs) in double
precision.",
acknowledgement = ack-nhfb,
articleno = "44",
}
@Article{OBroin:2012:OIS,
author = "Cathal {{\'O} Broin} and L. A. A. Nikolopoulos",
title = "An {OpenCL} implementation for the solution of the
time-dependent {Schr{\"o}dinger} equation on {GPUs} and
{CPUs}",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "10",
pages = "2071--2080",
month = oct,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2012.05.009",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu Jun 28 15:53:26 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465512001774",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Oh:2012:MOO,
author = "Kwang Jin Oh and Ji Hoon Kang and Hun Joo Myung",
title = "{mm\_par2.0}: An object-oriented molecular dynamics
simulation program parallelized using a hierarchical
scheme with {MPI} and {OPENMP}",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "2",
pages = "440--441",
month = feb,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2011.08.023",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 11 10:11:01 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465511003407",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Oliveira:2012:CCO,
author = "Rafael Sachetto Oliveira and Bernardo Martins Rocha
and Ronan Mendon{\c{c}}a Amorim",
title = "Comparing {CUDA}, {OpenCL} and {OpenGL}
Implementations of the Cardiac Monodomain Equations",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7204",
pages = "111--120",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-31500-8_12",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:26:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012c.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-31500-8_12/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-31500-8",
book-URL = "http://www.springerlink.com/content/978-3-642-31500-8",
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Olivier:2012:CMW,
author = "Stephen L. Olivier and Bronis R. de Supinski and
Martin Schulz and Jan F. Prins",
title = "Characterizing and mitigating work time inflation in
task parallel programs",
crossref = "Hollingsworth:2012:SPI",
pages = "65:1--65:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a066.pdf",
abstract = "Task parallelism raises the level of abstraction in
shared memory parallel programming to simplify the
development of complex applications. However, task
parallel applications can exhibit poor performance due
to thread idleness, scheduling overheads, and work time
inflation --- additional time spent by threads in a
multithreaded computation beyond the time required to
perform the same work in a sequential computation. We
identify the contributions of each factor to lost
efficiency in various task parallel OpenMP applications
and diagnose the causes of work time inflation in those
applications. Increased data access latency can cause
significant work time inflation in NUMA systems. Our
locality framework for task parallel OpenMP programs
mitigates this cause of work time inflation. Our
extensions to the Qthreads library demonstrate that
locality-aware scheduling can improve performance up to
3X compared to the Intel OpenMP task scheduler.",
acknowledgement = ack-nhfb,
articleno = "65",
}
@Article{Olivier:2012:OTS,
author = "Stephen L. Olivier and Allan K. Porterfield and Kyle
B. Wheeler and Michael Spiegel and Jan F. Prins",
title = "{OpenMP} task scheduling strategies for multicore
{NUMA} systems",
journal = j-IJHPCA,
volume = "26",
number = "2",
pages = "110--124",
month = may,
year = "2012",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342011434065",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Thu Nov 8 11:31:13 MST 2012",
bibsource = "http://hpc.sagepub.com/content/26/2.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/26/2/110.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "February 7, 2012",
}
@Article{Perla:2012:PAH,
author = "Francesca Perla and Paolo Zanetti",
title = "Performance Analysis of an Hybrid {MPI\slash OpenMP}
{ALM} Software for Life Insurance Policies on
Multi-core Architectures",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "250--253",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_19",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_19/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Preissl:2012:CSS,
author = "Robert Preissl and Theodore M. Wong and Pallab Datta
and Myron Flickner and Raghavendra Singh and Steven K.
Esser and William P. Risk and Horst D. Simon and
Dharmendra S. Modha",
title = "{Compass}: a scalable simulator for an architecture
for cognitive computing",
crossref = "Hollingsworth:2012:SPI",
pages = "54:1--54:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a085.pdf",
abstract = "Inspired by the function, power, and volume of the
organic brain, we are developing TrueNorth, a novel
modular, non-von Neumann, ultra-low power, compact
architecture. TrueNorth consists of a scalable network
of neurosynaptic cores, with each core containing
neurons, dendrites, synapses, and axons. To set sail
for TrueNorth, we developed Compass, a multi-threaded,
massively parallel functional simulator and a parallel
compiler that maps a network of long-distance pathways
in the macaque monkey brain to TrueNorth. We
demonstrate near-perfect weak scaling on a 16 rack
IBM\reg{} Blue Gene\reg{}/Q (262144 CPUs, 256 TB
memory), achieving an unprecedented scale of 256
million neurosynaptic cores containing 65 billion
neurons and 16 trillion synapses running only 388X
slower than real time with an average spiking rate of
8.1 Hz. By using emerging PGAS communication
primitives, we also demonstrate 2X better real-time
performance over MPI primitives on a 4 rack Blue Gene/P
(16384 CPUs, 16 TB memory).",
acknowledgement = ack-nhfb,
articleno = "54",
}
@Article{Puzniakowski:2012:TOI,
author = "Tadeusz Pu{\'z}niakowski and Marek A. Bednarczyk",
title = "Towards an {OpenCL} Implementation of `Genetic
Algorithms' on {GPUs}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7053",
pages = "190--203",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-25261-7_15",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:16 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-25261-7_15/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-25261-7",
book-URL = "http://www.springerlink.com/content/978-3-642-25261-7",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Qiu:2012:PWM,
author = "Judy Qiu and Seung-Hee Bae",
title = "Performance of windows multicore systems on threading
and {MPI}",
journal = j-CCPE,
volume = "24",
number = "1",
pages = "14--28",
month = jan,
year = "2012",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.1762",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Jan 16 12:11:17 MST 2012",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Prac\-tice and
Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "23 May 2011",
}
@InProceedings{Rietmann:2012:FAS,
author = "Max Rietmann and Peter Messmer and Tarje Nissen-Meyer
and Daniel Peter and Piero Basini and Dimitri
Komatitsch and Olaf Schenk and Jeroen Tromp and Lapo
Boschi and Domenico Giardini",
title = "Forward and adjoint simulations of seismic wave
propagation on emerging large-scale {GPU}
architectures",
crossref = "Hollingsworth:2012:SPI",
pages = "38:1--38:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a104.pdf",
abstract = "Computational seismology is an area of wide
sociological and economic impact, ranging from
earthquake risk assessment to subsurface imaging and
oil and gas exploration. At the core of these
simulations is the modeling of wave propagation in a
complex medium. Here we report on the extension of the
high-order finite-element seismic wave simulation
package SPECFEM3D to support the largest scale hybrid
and homogeneous supercomputers. Starting from an
existing highly tuned MPI code, we migrated to a CUDA
version. In order to be of immediate impact to the
science mission of computational seismologists, we had
to port the entire production package, rather than just
individual kernels. One of the challenges in
parallelizing finite element codes is the potential for
race conditions during the assembly phase. We therefore
investigated different methods such as mesh coloring or
atomic updates on the GPU. In order to achieve strong
scaling, we needed to ensure good overlap of data
motion at all levels, including internode and
host-accelerator transfers. Finally we carefully tuned
the GPU implementation. The new MPI/CUDA solver
exhibits excellent scalability and achieves speedup on
a node-to-node basis over the carefully tuned
equivalent multi-core MPI solver. To demonstrate the
performance of both the forward and adjoint
functionality, we present two case studies run on the
Cray XE6 CPU and Cray XK6 GPU architectures up to 896
nodes: (1) focusing on most commonly used forward
simulations, we simulate seismic wave propagation
generated by earthquakes in Turkey, and (2) testing the
most complex seismic inversion type of the package, we
use ambient seismic noise to image 3-D crust and mantle
structure beneath western Europe.",
acknowledgement = ack-nhfb,
articleno = "38",
}
@Article{Royuela:2012:ASO,
author = "Sara Royuela and Alejandro Duran and Chunhua Liao and
Daniel J. Quinlan",
title = "Auto-scoping for {OpenMP} Tasks",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "29--43",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_3",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_3/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Rubio-Largo:2012:UMO,
author = "{\'A}lvaro Rubio-Largo and Miguel A.
Vega-Rodr{\'\i}guez and Juan A. G{\'o}mez-Pulido",
title = "Using a Multiobjective {OpenMP+MPI DE} for the Static
{RWA} Problem",
journal = j-LECT-NOTES-COMP-SCI,
volume = "6927",
pages = "224--231",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-27549-4_29",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Dec 24 07:13:54 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-27549-4_29",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-27549-4",
book-URL = "http://www.springerlink.com/content/978-3-642-27549-4",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sabne:2012:ECO,
author = "Amit Sabne and Putt Sakdhnagool and Rudolf Eigenmann",
title = "Effects of Compiler Optimizations in {OpenMP} to
{CUDA} Translation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "169--181",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_13",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_13/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Samadi:2012:AIA,
author = "Mehrzad Samadi and Amir Hormati and Mojtaba Mehrara
and Janghaeng Lee and Scott Mahlke",
title = "Adaptive input-aware compilation for graphics
engines",
journal = j-SIGPLAN,
volume = "47",
number = "6",
pages = "13--22",
month = jun,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345156.2254067",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:49 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PLDI '12 proceedings.",
abstract = "While graphics processing units (GPUs) provide
low-cost and efficient platforms for accelerating high
performance computations, the tedious process of
performance tuning required to optimize applications is
an obstacle to wider adoption of GPUs. In addition to
the programmability challenges posed by GPU's complex
memory hierarchy and parallelism model, a well-known
application design problem is target portability across
different GPUs. However, even for a single GPU target,
changing a program's input characteristics can make an
already-optimized implementation of a program perform
poorly. In this work, we propose Adaptic, an adaptive
input-aware compilation system to tackle this
important, yet overlooked, input portability problem.
Using this system, programmers develop their
applications in a high-level streaming language and let
Adaptic undertake the difficult task of input portable
optimizations and code generation. Several input-aware
optimizations are introduced to make efficient use of
the memory hierarchy and customize thread composition.
At runtime, a properly optimized version of the
application is executed based on the actual program
input. We perform a head-to-head comparison between the
Adaptic generated and hand-optimized CUDA programs. The
results show that Adaptic is capable of generating
codes that can perform on par with their hand-optimized
counterparts over certain input ranges and outperform
them when the input falls out of the hand-optimized
programs' ``comfort zone''. Furthermore, we show that
input-aware results are sustainable across different
GPU targets making it possible to write and optimize
applications once and run them anywhere.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Santos:2012:ICC,
author = "Bruno F. L. Santos and Hendrik T. Macedo",
title = "Improving {CUDA{\TM} C\slash C++} encoding readability
to foster parallel application development",
journal = j-SIGSOFT,
volume = "37",
number = "1",
pages = "1--5",
month = jan,
year = "2012",
CODEN = "SFENDP",
DOI = "https://doi.org/10.1145/2088883.2088897",
ISSN = "0163-5948 (print), 1943-5843 (electronic)",
ISSN-L = "0163-5948",
bibdate = "Wed Aug 1 17:16:09 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigsoft2010.bib",
abstract = "Graphical Processing Units (GPUs) have recently been
used to enable parallel application development. The
most prominent initiative has been provided by
NVIDIA{\TM} with the so-called CUDA{\TM} architecture,
designed to GeForce{\TM} graphic cards. However, even
with CUDA C-like programming language, parallel
codification remains somewhat awkward if compared to
sequential codification. The programmer still has to
deal with low-level hardware details such as generation
and synchronization of threads and GPU tracks and
sectors. In this paper, we propose a
programmer-friendly interface for CUDA-C programming,
in such a way that most hardware details are hidden
from the programmer. We show how code readability is
improved without undermining parallel execution
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGSOFT Software Engineering Notes",
journal-URL = "https://dl.acm.org/citation.cfm?id=J728",
}
@Article{Satake:2012:OGA,
author = "Shin-ichi Satake and Hajime Yoshimori and Takayuki
Suzuki",
title = "Optimizations of a {GPU} accelerated heat conduction
equation by a programming of {CUDA Fortran} from an
analysis of a {PTX} file",
journal = j-COMP-PHYS-COMM,
volume = "183",
number = "11",
pages = "2376--2385",
month = nov,
year = "2012",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2012.06.005",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jul 27 07:00:54 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465512002068",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@InProceedings{Schindewolf:2012:WSA,
author = "Martin Schindewolf and Barna Bihari and John
Gyllenhaal and Martin Schulz and Amy Wang and Wolfgang
Karl",
title = "What scientific applications can benefit from hardware
transactional memory?",
crossref = "Hollingsworth:2012:SPI",
pages = "90:1--90:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a073.pdf",
abstract = "Achieving efficient and correct synchronization of
multiple threads is a difficult and error-prone task at
small scale and, as we march towards extreme scale
computing, will be even more challenging when the
resulting application is supposed to utilize millions
of cores efficiently. Transactional Memory (TM) is a
promising technique to ease the burden on the
programmer, but only recently has become available on
commercial hardware in the new Blue Gene/Q system and
hence the real benefit for realistic applications has
not been studied yet. This paper presents the first
performance results of TM embedded into OpenMP on a
prototype system of BG/Q and characterizes code
properties that will likely lead to benefits when
augmented with TM primitives. We first study the
influence of thread count, environment variables and
memory layout on TM performance and identify code
properties that will yield performance gains with TM.
Second, we evaluate the combination of OpenMP with
multiple synchronization primitives on top of MPI to
determine suitable task to thread ratios per node.
Finally, we condense our findings into a set of best
practices. These are applied to a Monte Carlo Benchmark
and a Smoothed Particle Hydrodynamics method. In both
cases an optimized TM version, executed with 64 threads
on one node, outperforms a simple TM implementation.
MCB with optimized TM yields a speedup of 27.45 over
baseline.",
acknowledgement = ack-nhfb,
articleno = "90",
}
@Article{Schmidl:2012:PAT,
author = "Dirk Schmidl and Peter Philippen and Daniel Lorenz and
Christian R{\"o}ssel and Markus Geimer",
title = "Performance Analysis Techniques for Task-Based
{OpenMP} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "196--209",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_15",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_15/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Schneider:2012:MAC,
author = "Timo Schneider and Robert Gerstenberger and Torsten
Hoefler",
title = "Micro-applications for Communication Data Access
Patterns and {MPI} Datatypes",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "121--131",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_17",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_17/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Sehrish:2012:RFS,
author = "Saba Sehrish and Jun Wang",
title = "{Reduced Function Set Abstraction (RFSA)} for
{MPI-IO}",
journal = j-J-SUPERCOMPUTING,
volume = "59",
number = "1",
pages = "131--146",
month = jan,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Dec 13 15:25:33 MST 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=1&spage=131",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Shan:2012:OAA,
author = "Hongzhang Shan and Erich Strohmaier and James Amundson
and Eric G. Stern",
title = "Optimizing the Advanced Accelerator Simulation
Framework {Synergia} Using {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "140--153",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_11",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_11/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Shan:2012:PEH,
author = "Hongzhang Shan and Nicholas J. Wright and John Shalf
and Katherine Yelick and Marcus Wagner and Nathan
Wichmann",
title = "A preliminary evaluation of the hardware acceleration
of the {Cray Gemini} interconnect for {PGAS} languages
and comparison with {MPI}",
journal = j-SIGMETRICS,
volume = "40",
number = "2",
pages = "92--98",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2381056.2381077",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Fri Nov 9 11:06:40 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigmetrics.bib",
abstract = "The Gemini interconnect on the Cray XE6 platform
provides for lightweight remote direct memory access
(RDMA) between nodes, which is useful for implementing
partitioned global address space (PGAS) languages like
UPC and Co-Array Fortran. In this paper, we perform a
study of Gemini performance using a set of
communication microbenchmarks and compare the
performance of one-sided communication in PGAS
languages with two-sided MPI. Our results demonstrate
the performance benefits of the PGAS model on Gemini
hardware, showing in what circumstances and by how much
one-sided communication outperforms two-sided in terms
of messaging rate, aggregate bandwidth, and computation
and communication overlap capability. For example, for
8-byte and 2KB messages the one-sided messaging rate is
5 and 10 times greater respectively than the two-sided
one. The study also reveals important information about
how to optimize one-sided Gemini communication.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
}
@Article{Sharma:2012:SRP,
author = "Subodh Sharma and Ganesh Gopalakrishnan",
title = "A Sound Reduction of Persistent-Sets for Deadlock
Detection in {MPI} Applications",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7498",
pages = "194--209",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33296-8_15",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:52 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33296-8_15/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33296-8",
book-URL = "http://www.springerlink.com/content/978-3-642-33296-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Shi:2012:VGA,
author = "Lin Shi and Hao Chen and Jianhua Sun and Kenli Li",
title = "{vCUDA}: {GPU}-Accelerated High-Performance Computing
in Virtual Machines",
journal = j-IEEE-TRANS-COMPUT,
volume = "61",
number = "6",
pages = "804--816",
month = jun,
year = "2012",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2011.112",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Fri Jul 27 08:32:31 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@InProceedings{Speck:2012:MST,
author = "R. Speck and D. Ruprecht and R. Krause and M. Emmett
and M. Minion and M. Winkel and P. Gibbon",
title = "A massively space-time parallel {$N$}-body solver",
crossref = "Hollingsworth:2012:SPI",
pages = "92:1--92:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a083.pdf",
abstract = "We present a novel space-time parallel version of the
Barnes--Hut tree code pepc using pfasst, the Parallel
Full Approximation Scheme in Space and Time. The naive
use of increasingly more processors for a fixed-size
N-body problem is prone to saturate as soon as the
number of unknowns per core becomes too small. To
overcome this intrinsic strong-scaling limit, we
introduce temporal parallelism on top of pepc's
existing hybrid MPI/PThreads spatial decomposition.
Here, we use pfasst which is based on a combination of
the iterations of the parallel-in-time algorithm
parareal with the sweeps of spectral deferred
correction (SDC) schemes. By combining these sweeps
with multiple space-time discretization levels, pfasst
relaxes the theoretical bound on parallel efficiency in
parareal. We present results from runs on up to 262,144
cores on the IBM Blue Gene/P installation JUGENE,
demonstrating that the space-time parallel code
provides speedup beyond the saturation of the purely
space-parallel approach.",
acknowledgement = ack-nhfb,
articleno = "92",
}
@Article{Steinberger:2012:SDS,
author = "Markus Steinberger and Bernhard Kainz and Bernhard
Kerbl and Stefan Hauswiesner and Michael Kenzel and
Dieter Schmalstieg",
title = "{Softshell}: dynamic scheduling on {GPUs}",
journal = j-TOG,
volume = "31",
number = "6",
pages = "161:1--161:??",
month = nov,
year = "2012",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/2366145.2366180",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Thu Nov 15 16:10:28 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tog/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
abstract = "In this paper we present Softshell, a novel execution
model for devices composed of multiple processing cores
operating in a single instruction, multiple data
fashion, such as graphics processing units (GPUs). The
Softshell model is intuitive and more flexible than the
kernel-based adaption of the stream processing model,
which is currently the dominant model for general
purpose GPU computation. Using the Softshell model,
algorithms with a relatively low local degree of
parallelism can execute efficiently on massively
parallel architectures. Softshell has the following
distinct advantages: (1) work can be dynamically issued
directly on the device, eliminating the need for
synchronization with an external source, i.e., the CPU;
(2) its three-tier dynamic scheduler supports arbitrary
scheduling strategies, including dynamic priorities and
real-time scheduling; and (3) the user can influence,
pause, and cancel work already submitted for parallel
execution. The Softshell processing model thus brings
capabilities to GPU architectures that were previously
only known from operating-system designs and reserved
for CPU programming. As a proof of our claims, we
present a publicly available implementation of the
Softshell processing model realized on top of CUDA. The
benchmarks of this implementation demonstrate that our
processing model is easy to use and also performs
substantially better than the state-of-the-art
kernel-based processing model for problems that have
been difficult to parallelize in the past.",
acknowledgement = ack-nhfb,
articleno = "161",
fjournal = "ACM Transactions on Graphics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778",
}
@Article{Strzodka:2012:DLO,
author = "Robert Strzodka",
title = "Data layout optimization for multi-valued containers
in {OpenCL}",
journal = j-J-PAR-DIST-COMP,
volume = "72",
number = "9",
pages = "1073--1082",
month = sep,
year = "2012",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2011.10.012",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Jul 27 06:43:44 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731511002115",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Su:2012:CPB,
author = "ChunYi Su and Dong Li and Dimitrios S. Nikolopoulos
and Matthew Grove and Kirk Cameron and Bronis R. de
Supinski",
title = "Critical path-based thread placement for {NUMA}
systems",
journal = j-SIGMETRICS,
volume = "40",
number = "2",
pages = "106--112",
month = sep,
year = "2012",
CODEN = "????",
DOI = "https://doi.org/10.1145/2381056.2381079",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Fri Nov 9 11:06:40 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigmetrics.bib",
abstract = "Multicore multiprocessors use a Non Uniform Memory
Architecture (NUMA) to improve their scalability.
However, NUMA introduces performance penalties due to
remote memory accesses. Without efficiently managing
data layout and thread mapping to cores, scientific
applications may suffer performance loss, even if they
are optimized for NUMA. In this paper, we present
algorithms and a runtime system that optimize the
execution of OpenMP applications on NUMA architectures.
By collecting information from hardware counters, the
runtime system directs thread placement and reduces
performance penalties by minimizing the critical path
of OpenMP parallel regions. The runtime system uses a
scalable algorithm that derives placement decisions
with negligible overhead. We evaluate our algorithms
and the runtime system with four NPB applications
implemented in OpenMP. On average the algorithms
achieve between 8.13\% and 25.68\% performance
improvement, compared to the default Linux thread
placement scheme. The algorithms miss the optimal
thread placement in only 8.9\% of the cases.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
}
@InProceedings{Subramoni:2012:DSI,
author = "H. Subramoni and S. Potluri and K. Kandalla and B.
Barth and J. Vienne and J. Keasler and K. Tomko and K.
Schulz and A. Moody and D. K. Panda",
title = "Design of a scalable {InfiniBand} topology service to
enable network-topology-aware placement of processes",
crossref = "Hollingsworth:2012:SPI",
pages = "70:1--70:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a076.pdf",
abstract = "Over the last decade, InfiniBand has become an
increasingly popular interconnect for deploying modern
super-computing systems. However, there exists no
detection service that can discover the underlying
network topology in a scalable manner and expose this
information to runtime libraries and users of the high
performance computing systems in a convenient way. In
this paper, we design a novel and scalable method to
detect the InfiniBand network topology by using
Neighbor-Joining techniques (NJ). To the best of our
knowledge, this is the first instance where the
neighbor joining algorithm has been applied to solve
the problem of detecting InfiniBand network topology.
We also design a network-topology-aware MPI library
that takes advantage of the network topology service.
The library places processes taking part in the MPI job
in a network-topology-aware manner with the dual aim of
increasing intra-node communication and reducing the
long distance inter-node communication across the
InfiniBand fabric.",
acknowledgement = ack-nhfb,
articleno = "70",
}
@Article{Sumimoto:2012:MCL,
author = "Shinji Sumimoto",
title = "The {MPI Communication Library} for the {K} Computer:
Its Design and Implementation",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "11--11",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_3",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/accesspage/chapter/10.1007/978-3-642-33518-1_3",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tahan:2012:ITC,
author = "Oussama Tahan and Mats Brorsson and Mohamed Shawky",
title = "Introducing Task Cancellation to {OpenMP}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "73--87",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_6",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_6/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tahan:2012:UDT,
author = "Oussama Tahan and Mohamed Shawky",
title = "Using Dynamic Task Level Redundancy for {OpenMP} Fault
Tolerance",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7179",
pages = "25--36",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-28293-5_3",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:25:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-28293-5_3/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-28293-5",
book-URL = "http://www.springerlink.com/content/978-3-642-28293-5",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tao:2012:UGA,
author = "Jian Tao and Marek Blazewicz and Steven R. Brandt",
title = "Using {GPU}'s to accelerate stencil-based computation
kernels for the development of large scale scientific
applications on heterogeneous systems",
journal = j-SIGPLAN,
volume = "47",
number = "8",
pages = "287--288",
month = aug,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2370036.2145857",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Sep 12 12:11:57 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPOPP '12 conference proceedings.",
abstract = "We present CaCUDA --- a GPGPU kernel abstraction and a
parallel programming framework for developing highly
efficient large scale scientific applications using
stencil computations on hybrid CPU/GPU architectures.
CaCUDA is built upon the Cactus computational toolkit,
an open source problem solving environment designed for
scientists and engineers. Due to the flexibility and
extensibility of the Cactus toolkit, the addition of a
GPGPU programming framework required no changes to the
Cactus infrastructure, guaranteeing that existing
features and modules will continue to work without
modification. CaCUDA was tested and benchmarked using a
3D CFD code based on a finite difference discretization
of Navier--Stokes equations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Terboven:2012:AOT,
author = "Christian Terboven and Dirk Schmidl and Tim Cramer and
Dieter an Mey",
title = "Assessing {OpenMP} Tasking Implementations on {NUMA}
Architectures",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "182--195",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_14",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_14/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Thibault:2012:AIF,
author = "Julien C. Thibault and Inanc Senocak",
title = "Accelerating incompressible flow computations with a
{Pthreads--CUDA} implementation on small-footprint
multi-{GPU} platforms",
journal = j-J-SUPERCOMPUTING,
volume = "59",
number = "2",
pages = "693--719",
month = feb,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Apr 6 17:44:43 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=2&spage=693",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Thoman:2012:AOL,
author = "Peter Thoman and Herbert Jordan and Simone Pellegrini
and Thomas Fahringer",
title = "Automatic {OpenMP} Loop Scheduling: a Combined
Compiler and Runtime Approach",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "88--101",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_7",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_7/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@InProceedings{Thorson:2012:SUF,
author = "Greg Thorson and Michael Woodacre",
title = "{SGI UV2}: a fused computation and data analysis
machine",
crossref = "Hollingsworth:2012:SPI",
pages = "105:1--105:??",
year = "2012",
bibdate = "Thu Nov 15 07:38:35 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
URL = "http://conferences.computer.org/sc/2012/papers/1000a080.pdf",
abstract = "UV2 is SGI's second generation data fusion system. UV2
was designed to meet the latest challenges facing users
in computation and data analysis. Its unique ability to
perform both functions on a single platform enables
efficient, easy to manage workflows. This platform has
a hybrid infrastructure, leveraging the latest
Intel\reg{} EP processors providing industry leading
computational power. Due to its high bandwidth,
extremely low latency NUMALink\reg{}6 (NL6)
interconnect, plus vectorized synchronization and data
movement, UV2 provides industry leading data intensive
capability. It supports a single operating system (OS)
image up to 64TB and 4K threads. Multiple OS images can
be deployed on a single NL6 fabric, which has a single
flat address space up to 8PB and 256K threads. These
capabilities allow for extreme performance on a broad
range of programming models and languages including
OpenMP[1], MPI, UPC[2], CAF[3] and SHMEM. The
architecture, implementation and performance of UV2 are
detailed.",
acknowledgement = ack-nhfb,
articleno = "105",
}
@Article{Traff:2012:AUE,
author = "Jesper Larsson Tr{\"a}ff",
title = "Alternative, uniformly expressive and more scalable
interfaces for collective communication in {MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "38",
number = "1--2",
pages = "26--36",
month = jan # "\slash " # feb,
year = "2012",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2011.10.009",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 15:17:36 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819111001402",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Traff:2012:MTM,
author = "Jesper Larsson Tr{\"a}ff",
title = "{{\tt mpicroscope}}: Towards an {MPI} Benchmark Tool
for Performance Guideline Verification",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "100--109",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_15",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_15/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tsutsui:2012:AMG,
author = "Shigeyoshi Tsutsui",
title = "{ACO} on Multiple {GPUs} with {CUDA} for Faster
Solution of {QAPs}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7492",
pages = "174--184",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-32964-7_18",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:44 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-32964-7_18/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-32964-7",
book-URL = "http://www.springerlink.com/content/978-3-642-32964-7",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Tu:2012:PAO,
author = "Bibo Tu and Jianping Fan and Jianfeng Zhan and
Xiaofang Zhao",
title = "Performance analysis and optimization of {MPI}
collective operations on multi-core clusters",
journal = j-J-SUPERCOMPUTING,
volume = "60",
number = "1",
pages = "141--162",
month = apr,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Apr 6 17:45:24 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=60&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=60&issue=1&spage=141",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Unat:2012:AFD,
author = "Didem Unat and Jun Zhou and Yifeng Cui and Scott B.
Baden and Xing Cai",
title = "Accelerating a {43$D$} Finite-Difference Earthquake
Simulation with a {C-to-CUDA} Translator",
journal = j-COMPUT-SCI-ENG,
volume = "14",
number = "3",
pages = "48--59",
month = may # "\slash " # jun,
year = "2012",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2012.44",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu Apr 26 17:01:57 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Urena:2012:IMI,
author = "Isa{\'\i}as A. Compr{\'e}s Ure{\~n}a and Michael
Riepen and Michael Konow and Michael Gerndt",
title = "Invasive {MPI} on {Intel}'s Single-Chip Cloud
Computer",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7179",
pages = "74--85",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-28293-5_7",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:25:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012b.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-28293-5_7/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-28293-5",
book-URL = "http://www.springerlink.com/content/978-3-642-28293-5",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wang:2012:OVT,
author = "Cheng Wang and Sunita Chandrasekaran and Barbara
Chapman",
title = "An {OpenMP 3.1} Validation Testsuite",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7312",
pages = "237--249",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8_18",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012e.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-30961-8_18/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-30961-8",
book-URL = "http://www.springerlink.com/content/978-3-642-30961-8",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Wei:2012:OLL,
author = "Zheng Wei and Joseph Jaja",
title = "Optimization of Linked List Prefix Computations on
Multithreaded {GPUs} Using {CUDA}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "22",
number = "4",
pages = "1250012",
month = dec,
year = "2012",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626412500120",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Sat Jun 22 15:54:17 MDT 2013",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Wu:2012:DPL,
author = "Chao-Chin Wu and Chao-Tung Yang and Kuan-Chou Lai and
Po-Hsun Chiu",
title = "Designing parallel loop self-scheduling schemes using
the hybrid {MPI} and {OpenMP} programming model for
multi-core grid systems",
journal = j-J-SUPERCOMPUTING,
volume = "59",
number = "1",
pages = "42--60",
month = jan,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Dec 13 15:25:33 MST 2011",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=59&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=59&issue=1&spage=42",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Wu:2012:PCH,
author = "Xingfu Wu and Valerie Taylor",
title = "Performance Characteristics of Hybrid {MPI\slash
OpenMP} Implementations of {NAS Parallel Benchmarks}
{SP} and {BT} on Large-Scale Multicore Clusters",
journal = j-COMP-J,
volume = "55",
number = "2",
pages = "154--167",
month = feb,
year = "2012",
CODEN = "CMPJA6",
DOI = "https://doi.org/10.1093/comjnl/bxr063",
ISSN = "0010-4620 (print), 1460-2067 (electronic)",
ISSN-L = "0010-4620",
bibdate = "Thu Feb 2 09:12:17 MST 2012",
bibsource = "http://comjnl.oxfordjournals.org/content/55/2.toc;
http://www.math.utah.edu/pub/tex/bib/compj2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://comjnl.oxfordjournals.org/content/55/2/154.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "Computer Journal",
journal-URL = "http://comjnl.oxfordjournals.org/",
onlinedate = "July 18, 2011",
}
@Article{Wu:2012:UHM,
author = "Chao-Chin Wu and Lien-Fu Lai and Chao-Tung Yang and
Po-Hsun Chiu",
title = "Using hybrid {MPI} and {OpenMP} programming to
optimize communications in parallel loop
self-scheduling schemes for multicore {PC} clusters",
journal = j-J-SUPERCOMPUTING,
volume = "60",
number = "1",
pages = "31--61",
month = apr,
year = "2012",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Apr 6 17:45:24 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=60&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=60&issue=1&spage=31",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Yoshinaga:2012:DBM,
author = "Kazumi Yoshinaga and Yuichi Tsujita and Atsushi Hori
and Mikiko Sato and Mitaro Namiki",
title = "Delegation-Based {MPI} Communications for a Hybrid
Parallel Computer with Many-Core Architecture",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "47--56",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_10",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_10/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Yu:2012:SCC,
author = "Fang Yu and Shun-Ching Yang and Farn Wang and
Guan-Cheng Chen and Che-Chang Chan",
title = "Symbolic consistency checking of {OpenMP} parallel
programs",
journal = j-SIGPLAN,
volume = "47",
number = "5",
pages = "139--148",
month = may,
year = "2012",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2345141.2248438",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 6 16:31:46 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "LCTES '12 proceedings.",
abstract = "We present a symbolic approach for checking
consistency of OpenMP parallel programs. A parallel
program is consistent if it yields the same result as
its sequential version despite the execution order
among threads. We find race conditions of an OpenMP
parallel program, construct the formal model of its
raced segments under relaxed memory models, and perform
guided symbolic simulation to search consistency
violations. The simulation terminates when (1) a
witness has been found (the program is inconsistent),
or (2) all reachable states have been explored (the
program is consistent). We have developed the tool
Pathg by incorporating Omega library to solve race
constraints and Red symbolic simulator to perform
guided search. We show that Pathg can prove consistency
of programs, identify races that modern OpenMP checkers
failed to report, and find inconsistency witnesses
effectively against benchmarks from the OpenMP Source
Code Repository and the NAS Parallel benchmark suite.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Yuan:2012:PCS,
author = "Zhiyong Yuan and Weixin Si and Xiangyun Liao and
Zhaoliang Duan and Yihua Ding and Jianhui Zhao",
title = "Parallel computing of {$3$D} smoking simulation based
on {OpenCL} heterogeneous platform",
journal = j-J-SUPERCOMPUTING,
volume = "61",
number = "1",
pages = "84--102",
month = jul,
year = "2012",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-011-0652-y",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Oct 26 07:41:32 MDT 2012",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=61&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.springerlink.com/openurl.asp?genre=article&issn=0920-8542&volume=61&issue=1&spage=84",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Zahavi:2012:FTR,
author = "Eitan Zahavi",
title = "Fat-tree routing and node ordering providing
contention free traffic for {MPI} global collectives",
journal = j-J-PAR-DIST-COMP,
volume = "72",
number = "11",
pages = "1423--1432",
month = nov,
year = "2012",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2012.01.018",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Sep 12 12:11:36 MDT 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731512000305",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Zhao:2012:ASO,
author = "Xin Zhao and Gopalakrishnan Santhanaraman and William
Gropp",
title = "Adaptive Strategy for One-Sided Communication in
{MPICH2}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7490",
pages = "16--26",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1_7",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012h.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-33518-1_7/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-33518-1",
book-URL = "http://www.springerlink.com/content/978-3-642-33518-1",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Zhou:2012:DFD,
author = "Xu Zhou and Kai Lu and Xicheng Lu and Xiaoping Wang
and Baohua Fan",
title = "{dMPI}: Facilitating Debugging of {MPI} Programs via
Deterministic Message Passing",
journal = j-LECT-NOTES-COMP-SCI,
volume = "7513",
pages = "172--179",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-35606-3_20",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Wed Dec 19 15:24:06 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012i.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-642-35606-3_20/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-35606-3",
book-URL = "http://www.springerlink.com/content/978-3-642-35606-3",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Zhu:2012:CDS,
author = "Ke Zhu and Matthias Butenuth and Pablo d'Angelo",
title = "Comparison of Dense Stereo Using {CUDA}",
journal = j-LECT-NOTES-COMP-SCI,
volume = "6554",
pages = "398--410",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-35740-4_31",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
bibdate = "Mon Dec 24 08:20:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs2012a.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/978-3-642-35740-4_31",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-642-35740-4",
book-URL = "http://www.springerlink.com/content/978-3-642-35740-4",
fjournal = "Lecture Notes in Computer Science",
}
@Article{Augusto:2013:APG,
author = "Douglas A. Augusto and Helio J. C. Barbosa",
title = "Accelerated parallel genetic programming tree
evaluation with {OpenCL}",
journal = j-J-PAR-DIST-COMP,
volume = "73",
number = "1",
pages = "86--100",
month = jan,
year = "2013",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2012.01.012",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Nov 17 07:06:13 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S074373151200024X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Bach:2013:LQB,
author = "Matthias Bach and Volker Lindenstruth and Owe
Philipsen and Christopher Pinke",
title = "{Lattice QCD} based on {OpenCL}",
journal = j-COMP-PHYS-COMM,
volume = "184",
number = "9",
pages = "2042--2052",
month = sep,
year = "2013",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Aug 26 14:34:22 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465513001288",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Bai:2013:SLA,
author = "Mingze Bai and Shixin Sun and Hong Tang and Yusheng
Dou and Glenn V. Lo",
title = "An {SPMD}-Like Algorithm for Parallelizing Molecular
Dynamics Using {OpenMP}",
journal = j-COMPUT-SCI-ENG,
volume = "15",
number = "4",
pages = "48--56",
month = jul # "\slash " # aug,
year = "2013",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2012.66",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Tue Dec 3 15:39:06 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Barkati:2013:SPA,
author = "Karim Barkati and Pierre Jouvelot",
title = "Synchronous programming in audio processing: a lookup
table oscillator case study",
journal = j-COMP-SURV,
volume = "46",
number = "2",
pages = "24:1--24:??",
month = nov,
year = "2013",
CODEN = "CMSVAN",
DOI = "https://doi.org/10.1145/2543581.2543591",
ISSN = "0360-0300 (print), 1557-7341 (electronic)",
ISSN-L = "0360-0300",
bibdate = "Thu Feb 6 07:35:29 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/surveys/;
http://www.math.utah.edu/pub/tex/bib/compsurv.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The adequacy of a programming language to a given
software project or application domain is often
considered a key factor of success in software
development and engineering, even though little
theoretical or practical information is readily
available to help make an informed decision. In this
article, we address a particular version of this issue
by comparing the adequacy of general-purpose
synchronous programming languages to more
Domain-Specific Languages (DSLs) in the field of
computer music. More precisely, we implemented and
tested the same lookup table oscillator example
program, one of the most classical algorithms for sound
synthesis, using a selection of significant synchronous
programming languages, half of which designed as
specific music languages-Csound, Pure Data,
SuperCollider, ChucK, Faust-and the other half being
general synchronous formalisms-Signal, Lustre, Esterel,
Lucid Synchrone and C with the OpenMP Stream Extension
(Matlab/Octave is used for the initial specification).
The advantages of these two approaches are discussed,
providing insights to language designers and possibly
software developers of both communities regarding
programming languages design for the audio domain.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Computing Surveys",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J204",
}
@Article{Berka:2013:CPC,
author = "Tobias Berka and Giorgos Kollias and Helge Hagenauer
and Marian Vajter{\v{s}}ic and Ananth Grama",
title = "Concurrent programming constructs for parallel {MPI}
applications",
journal = j-J-SUPERCOMPUTING,
volume = "63",
number = "2",
pages = "385--406",
month = feb,
year = "2013",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-011-0739-5",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Apr 1 14:50:44 MDT 2013",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=63&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-011-0739-5",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Bland:2013:EUL,
author = "Wesley Bland and Aurelien Bouteiller and Thomas
Herault and Joshua Hursey {\ldots}",
title = "An evaluation of {User-Level Failure Mitigation}
support in {MPI}",
journal = j-COMPUTING,
volume = "95",
number = "12",
pages = "1171--1184",
month = dec,
year = "2013",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-013-0331-3",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Wed Jan 29 10:10:11 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=95&issue=12;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s00607-013-0331-3",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Bland:2013:PFR,
author = "Wesley Bland and Aurelien Bouteiller and Thomas
Herault and George Bosilca and Jack Dongarra",
title = "Post-failure recovery of {MPI} communication
capability: Design and rationale",
journal = j-IJHPCA,
volume = "27",
number = "3",
pages = "244--254",
month = aug,
year = "2013",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342013488238",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Mar 14 15:39:55 MDT 2014",
bibsource = "http://hpc.sagepub.com/content/27/3.toc;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/27/3/244.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "June 3, 2013",
}
@Article{Bland:2013:SIP,
author = "Wesley Bland and Peng Du and Aurelien Bouteiller and
Thomas Herault and George Bosilca and Jack J.
Dongarra",
title = "Special Issue Papers: Extending the scope of the
{Checkpoint-on-Failure} protocol for forward recovery
in standard {MPI}",
journal = j-CCPE,
volume = "25",
number = "17",
pages = "2381--2393",
day = "10",
month = dec,
year = "2013",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3100",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Dec 3 10:37:48 MST 2013",
bibsource = "http://www.interscience.wiley.com/jpages/1532-0626;
http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "23 Jul 2013",
}
@Article{Buyukkececi:2013:POI,
author = "Ferit B{\"u}y{\"u}kke{\c{c}}eci and Omar Awile and Ivo
F. Sbalzarini",
title = "A portable {OpenCL} implementation of generic
particle-mesh and mesh-particle interpolation in {$2$D}
and {$3$D}",
journal = j-PARALLEL-COMPUTING,
volume = "39",
number = "2",
pages = "94--111",
month = feb,
year = "2013",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2012.12.001",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Thu Feb 28 07:26:40 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819112000920",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@TechReport{Cao:2013:CHP,
author = "Chongxiao Cao and Jack Dongarra and Peng Du and Mark
Gates and Piotr Luszczek and Stanimire Tomov",
title = "{clMAGMA}: High Performance Dense Linear Algebra with
{OpenCL}",
type = "LAPACK Working Note",
number = "275",
institution = inst-UTK-CS,
address = inst-UTK-CS:adr,
month = mar,
year = "2013",
bibdate = "Sun May 5 11:20:19 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/lawn.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.netlib.org/lapack/lawnspdf/lawn275.pdf",
acknowledgement = ack-nhfb,
utknumber = "UT-CS-13-706",
}
@Article{Chang:2013:PDS,
author = "Yao-Lin Chang and I-Lun Tseng",
title = "A parallel dual-scanline algorithm for partitioning
parameterized 45-degree polygons",
journal = j-TODAES,
volume = "18",
number = "4",
pages = "59:1--59:??",
month = oct,
year = "2013",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/2505015",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Fri Nov 8 11:45:54 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/todaes/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/todaes.bib",
abstract = "In order to use rectangular corner stitching data
structures in storing parameterized orthogonal layouts,
parameterized polygons in the layouts must be
partitioned into rectangles. Likewise, in order to use
trapezoidal corner stitching data structures in storing
parameterized 45-degree layouts, parameterized polygons
in the layouts have to be partitioned into trapezoids.
In this article, a parallel polygon partitioning
algorithm is proposed; the algorithm is capable of
partitioning parameterized orthogonal polygons into
parameterized rectangles as well as partitioning
parameterized 45-degree polygons into parameterized
trapezoids. Additionally, the algorithm can be used to
partition fixed-coordinate polygons. By adopting the
dual-scanline technique, which involves using two
scanlines to concurrently sweep an input polygon, the
parallel partitioning algorithm can process vertices
and edges of the input polygon efficiently. The
parallel polygon partitioning algorithm has been
implemented in C++ with the use of OpenMP. Compared
with a sequential partitioning program which uses a
single scanline, our parallel partitioning program can
achieve 20\% to 30\% speedup while partitioning large
parameterized polygons or partitioning parameterized
polygons with complex constraints.",
acknowledgement = ack-nhfb,
articleno = "59",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J776",
}
@Article{Chen:2013:IRM,
author = "Zhezhe Chen and Qi Gao and Wenbin Zhang and Feng Qin",
title = "Improving the Reliability of {MPI} Libraries via
Message Flow Checking",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "24",
number = "3",
pages = "535--549",
month = mar,
year = "2013",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2012.127",
ISSN = "1045-9219",
ISSN-L = "1045-9219",
bibdate = "Wed May 1 08:02:21 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Czapinski:2013:EPM,
author = "Michal Czapi{\'n}ski",
title = "An effective {Parallel Multistart Tabu Search for
Quadratic Assignment Problem} on {CUDA} platform",
journal = j-J-PAR-DIST-COMP,
volume = "73",
number = "11",
pages = "1461--1468",
month = nov,
year = "2013",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon Sep 23 11:46:28 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S074373151200175X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Dang:2013:CES,
author = "Hoang-Vu Dang and Bertil Schmidt",
title = "{CUDA}-enabled Sparse Matrix-Vector Multiplication on
{GPUs} using atomic operations",
journal = j-PARALLEL-COMPUTING,
volume = "39",
number = "11",
pages = "737--750",
month = nov,
year = "2013",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Nov 29 10:01:37 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001178",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Demidov:2013:PCO,
author = "Denis Demidov and Karsten Ahnert and Karl Rupp and
Peter Gottschling",
title = "Programming {CUDA} and {OpenCL}: a Case Study Using
Modern {C++} Libraries",
journal = j-SIAM-J-SCI-COMP,
volume = "35",
number = "5",
pages = "C453--C472",
month = "????",
year = "2013",
CODEN = "SJOCE3",
DOI = "https://doi.org/10.1137/120903683",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
bibdate = "Fri Mar 7 10:32:43 MST 2014",
bibsource = "http://epubs.siam.org/toc/sjoce3/35/5;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib",
acknowledgement = ack-nhfb,
fjournal = "SIAM Journal on Scientific Computing",
journal-URL = "http://epubs.siam.org/sisc",
onlinedate = "January 2013",
}
@Article{Deo:2013:PSA,
author = "Mrinal Deo and Sean Keely",
title = "Parallel suffix array and least common prefix for the
{GPU}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "197--206",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442536",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
http://www.math.utah.edu/pub/tex/bib/string-matching.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Suffix Array (SA) is a data structure formed by
sorting the suffixes of a string into lexicographic
order. SAs have been used in a variety of applications,
most notably in pattern matching and Burrows--Wheeler
Transform (BWT) based lossless data compression. SAs
have also become the data structure of choice for many,
if not all, string processing problems to which suffix
tree methodology is applicable. Over the last two
decades researchers have proposed many suffix array
construction algorithm (SACAs). We do a systematic
study of the main classes of SACAs with the intent of
mapping them onto a data parallel architecture like the
GPU. We conclude that skew algorithm [12], a linear
time recursive algorithm, is the best candidate for
GPUs as all its phases can be efficiently mapped to a
data parallel hardware. Our OpenCL implementation of
skew algorithm achieves a throughput of up to 25
MStrings/sec and a speedup of up to 34x and 5.8x over a
single threaded CPU implementation using a discrete GPU
and APU respectively. We also compare our OpenCL
implementation against the fastest known CPU
implementation based on induced copying and achieve a
speedup of up to 3.7x. Using SA we construct BWT on GPU
and achieve a speedup of 11x over the fastest known BWT
on GPU. Suffix arrays are often augmented with the
longest common prefix (LCP) information. We design a
novel high-performance parallel algorithm for computing
LCP on the GPU. Our GPU implementation of LCP achieves
a speedup of up to 25x and 4.3x on discrete GPU and APU
respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Ellingson:2013:SNU,
author = "Sally R. Ellingson and Jeremy C. Smith and Jerome
Baudry",
title = "Software News and Updates: {VinaMPI}: {Facilitating}
multiple receptor high-throughput virtual docking on
high-performance computers",
journal = j-J-COMPUT-CHEM,
volume = "34",
number = "25",
pages = "2212--2221",
day = "30",
month = sep,
year = "2013",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.23367",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Wed Nov 13 14:32:36 MST 2013",
bibsource = "http://www.interscience.wiley.com/jpages/0192-8651;
http://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X",
onlinedate = "29 Jun 2013",
}
@Article{Friedley:2013:OPE,
author = "Andrew Friedley and Torsten Hoefler and Greg
Bronevetsky and Andrew Lumsdaine and Ching-Chen Ma",
title = "Ownership passing: efficient distributed memory
programming on multi-core systems",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "177--186",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "The number of cores in multi- and many-core
high-performance processors is steadily increasing.
MPI, the de-facto standard for programming
high-performance computing systems offers a distributed
memory programming model. MPI's semantics force a copy
from one process' send buffer to another process'
receive buffer. This makes it difficult to achieve the
same performance on modern hardware than shared memory
programs which are arguably harder to maintain and
debug. We propose generalizing MPI's communication
model to include ownership passing, which make it
possible to fully leverage the shared memory hardware
of multi- and many-core CPUs to stream communicated
data concurrently with the receiver's computations on
it. The benefits and simplicity of message passing are
retained by extending MPI with calls to send (pass)
ownership of memory regions, instead of their contents,
between processes. Ownership passing is achieved with a
hybrid MPI implementation that runs MPI processes as
threads and is mostly transparent to the user. We
propose an API and a static analysis technique to
transform legacy MPI codes automatically and
transparently to the programmer, demonstrating that
this scheme is easy to use in practice. Using the
ownership passing technique, we see up to 51\%
communication speedups over a standard message passing
implementation on state-of-the art multicore systems.
Our analysis and interface will lay the groundwork for
future development of MPI-aware optimizing compilers
and multi-core specific optimizations, which will be
key for success in current and next-generation
computing platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Gao:2013:GGA,
author = "Mingcen Gao and Thanh-Tung Cao and Ashwin Nanjappa and
Tiow-Seng Tan and Zhiyong Huang",
title = "{gHull}: a {GPU} algorithm for {$3$D} convex hull",
journal = j-TOMS,
volume = "40",
number = "1",
pages = "3:1--3:19",
month = sep,
year = "2013",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2513109.2513112",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon Sep 30 16:05:58 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "A novel algorithm is presented to compute the convex
hull of a point set in R$^3$ using the graphics
processing unit (GPU). By exploiting the relationship
between the Voronoi diagram and the convex hull, the
algorithm derives the approximation of the convex hull
from the former. The other extreme vertices of the
convex hull are then found by using a two-round
checking in the digital and the continuous space
successively. The algorithm does not need explicit
locking or any other concurrency control mechanism,
thus it can maximize the parallelism available on the
modern GPU. The implementation using the CUDA
programming model on NVIDIA GPUs is exact and
efficient. The experiments show that it is up to an
order of magnitude faster than other sequential convex
hull implementations running on the CPU for inputs of
millions of points. The works demonstrate that the GPU
can be used to solve nontrivial computational geometry
problems with significant performance benefit.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Gardner:2013:CCE,
author = "Mark Gardner and Paul Sathre and Wu-chun Feng and
Gabriel Martinez",
title = "Characterizing the challenges and evaluating the
efficacy of a {CUDA-to-OpenCL} translator",
journal = j-PARALLEL-COMPUTING,
volume = "39",
number = "12",
pages = "769--786",
month = dec,
year = "2013",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Dec 3 18:06:48 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001075",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Goglin:2013:KGS,
author = "Brice Goglin and St{\'e}phanie Moreaud",
title = "{KNEM}: a generic and scalable kernel-assisted
intra-node {MPI} communication framework",
journal = j-J-PAR-DIST-COMP,
volume = "73",
number = "2",
pages = "176--188",
month = feb,
year = "2013",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2012.09.016",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Dec 13 20:22:17 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731512002316",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Grasso:2013:APS,
author = "Ivan Grasso and Klaus Kofler and Biagio Cosenza and
Thomas Fahringer",
title = "Automatic problem size sensitive task partitioning on
heterogeneous parallel systems",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "281--282",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442545",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "In this paper we propose a novel approach which
automatizes task partitioning in heterogeneous systems.
Our framework is based on the Insieme Compiler and
Runtime infrastructure. The compiler translates a
single-device OpenCL program into a multi-device OpenCL
program. The runtime system then performs dynamic task
partitioning based on an offline-generated prediction
model. In order to derive the prediction model, we use
a machine learning approach that incorporates static
program features as well as dynamic, input sensitive
features. Our approach has been evaluated over a suite
of 23 programs and achieves performance improvements
compared to an execution of the benchmarks on a single
CPU and a single GPU only.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Gu:2013:PCI,
author = "Zheng Gu and Matthew Small and Xin Yuan and Aniruddha
Marathe and David K. Lowenthal",
title = "Protocol Customization for Improving {MPI} Performance
on {RDMA}-Enabled Clusters",
journal = j-INT-J-PARALLEL-PROG,
volume = "41",
number = "5",
pages = "682--703",
month = oct,
year = "2013",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-013-0242-0",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jun 22 12:29:26 MDT 2013",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=41&issue=5;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-013-0242-0",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Hadi:2013:CFA,
author = "Mohammed F. Hadi and Seyed A. Esmaeili",
title = "{CUDA Fortran} acceleration for the finite-difference
time-domain method",
journal = j-COMP-PHYS-COMM,
volume = "184",
number = "5",
pages = "1395--1400",
month = may,
year = "2013",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Mar 27 05:55:10 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465513000118",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Heimel:2013:HOP,
author = "Max Heimel and Michael Saecker and Holger Pirk and
Stefan Manegold and Volker Markl",
title = "Hardware-oblivious parallelism for in-memory
column-stores",
journal = j-PROC-VLDB-ENDOWMENT,
volume = "6",
number = "9",
pages = "709--720",
month = jul,
year = "2013",
CODEN = "????",
ISSN = "2150-8097",
bibdate = "Fri Dec 13 05:56:46 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/vldbe.bib",
abstract = "The multi-core architectures of today's computer
systems make parallelism a necessity for performance
critical applications. Writing such applications in a
generic, hardware-oblivious manner is a challenging
problem: Current database systems thus rely on
labor-intensive and error-prone manual tuning to
exploit the full potential of modern parallel hardware
architectures like multi-core CPUs and graphics cards.
We propose an alternative design for a parallel
database engine, based on a single set of
hardware-oblivious operators, which are compiled down
to the actual hardware at runtime. This design reduces
the development overhead for parallel database engines,
while achieving competitive performance to hand-tuned
systems. We provide a proof-of-concept for this design
by integrating operators written using the parallel
programming framework OpenCL into the open-source
database MonetDB. Following this approach, we achieve
efficient, yet highly portable parallel code without
the need for optimization by hand. We evaluated our
implementation against MonetDB using TPC-H derived
queries and observed a performance that rivals that of
MonetDB's query execution on the CPU and surpasses it
on the GPU. In addition, we show that the same set of
operators runs nearly unchanged on a GPU, demonstrating
the feasibility of our approach.",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the VLDB Endowment",
}
@Article{Hilbrich:2013:MRE,
author = "Tobias Hilbrich and Joachim Protze and Martin Schulz
and Bronis R. de Supinski and Matthias S. M{\"u}ller",
title = "{MPI} runtime error detection with {MUST}: {Advances}
in deadlock detection",
journal = j-SCI-PROG,
volume = "21",
number = "3--4",
pages = "109--121",
month = "????",
year = "2013",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.3233/SPR-130368",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Sat Mar 8 14:11:02 MST 2014",
bibsource = "http://www.iospress.nl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Hoefler:2013:MMN,
author = "Torsten Hoefler and James Dinan and Darius Buntinas
and Pavan Balaji and Brian Barrett {\ldots}",
title = "{MPI $+$ MPI}: a new hybrid approach to parallel
programming with {MPI} plus shared memory",
journal = j-COMPUTING,
volume = "95",
number = "12",
pages = "1121--1136",
month = dec,
year = "2013",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-013-0324-2",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Wed Jan 29 10:10:11 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=95&issue=12;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s00607-013-0324-2",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Hogg:2013:FDT,
author = "J. D. Hogg",
title = "A Fast Dense Triangular Solve in {CUDA}",
journal = j-SIAM-J-SCI-COMP,
volume = "35",
number = "3",
pages = "C303--C322",
month = "????",
year = "2013",
CODEN = "SJOCE3",
DOI = "https://doi.org/10.1137/12088358X",
ISSN = "1064-8275 (print), 1095-7197 (electronic)",
ISSN-L = "1064-8275",
bibdate = "Fri Jul 19 07:43:53 MDT 2013",
bibsource = "http://epubs.siam.org/sam-bin/dbq/toc/SISC/35/3;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/siamjscicomput.bib",
acknowledgement = ack-nhfb,
fjournal = "SIAM Journal on Scientific Computing",
journal-URL = "http://epubs.siam.org/sisc",
onlinedate = "January 2013",
}
@Article{Huang:2013:ACM,
author = "Libo Huang and Zhiying Wang and Nong Xiao and Yongwen
Wang and Qiang Dou",
title = "Adaptive communication mechanism for accelerating
{MPI} functions in {NoC}-based multicore processors",
journal = j-TACO,
volume = "10",
number = "3",
pages = "18:1--18:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2512434",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Mon Sep 16 17:20:12 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Multicore designs have emerged as the dominant
organization for future high-performance
microprocessors. Communication in such designs is often
enabled by Networks-on-Chip (NoCs). A new trend in such
architectures is to fit a Message Passing Interface
(MPI) programming model on NoCs to achieve optimal
parallel application performance. A key issue in
designing MPI over NoCs is communication protocol,
which has not been explored in previous research. This
article advocates a hardware-supported communication
mechanism using a protocol-adaptive approach to adjust
to varying NoC configurations (e.g., number of buffers)
and workload behavior (e.g., number of messages). We
propose the ADaptive Communication Mechanism (ADCM), a
hybrid protocol that involves behavior similar to
buffered communication when sufficient buffer is
available in the receiver to that similar to a
synchronous protocol when buffers in the receiver are
limited. ADCM adapts dynamically by deciding
communication protocol on a per-request basis using a
local estimate of recent buffer utilization. ADCM
attempts to combine both the advantages of buffered and
synchronous communication modes to achieve enhanced
throughput and performance. Simulations of various
workloads show that the proposed communication
mechanism can be effectively used in future NoC
designs.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Jimenez:2013:BCA,
author = "Jes{\'u}s Jim{\'e}nez and Juan {Ruiz de Miras}",
title = "Box-counting algorithm on {GPU} and multi-core {CPU}:
an {OpenCL} cross-platform study",
journal = j-J-SUPERCOMPUTING,
volume = "65",
number = "3",
pages = "1327--1352",
month = sep,
year = "2013",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-013-0885-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Feb 8 11:06:43 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=65&issue=3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-013-0885-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Jin:2013:PCU,
author = "Hui Jin and Xian-He Sun",
title = "Performance comparison under failures of {MPI} and
{MapReduce}: an analytical approach",
journal = j-FUT-GEN-COMP-SYS,
volume = "29",
number = "7",
pages = "1808--1815",
month = sep,
year = "2013",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Aug 26 16:08:23 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X13000290",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Jog:2013:OCT,
author = "Adwait Jog and Onur Kayiran and Nachiappan Chidambaram
Nachiappan and Asit K. Mishra and Mahmut T. Kandemir
and Onur Mutlu and Ravishankar Iyer and Chita R. Das",
title = "{OWL}: cooperative thread array aware scheduling
techniques for improving {GPGPU} performance",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "395--406",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451158",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Emerging GPGPU architectures, along with programming
models like CUDA and OpenCL, offer a cost-effective
platform for many applications by providing high thread
level parallelism at lower energy budgets.
Unfortunately, for many general-purpose applications,
available hardware resources of a GPGPU are not
efficiently utilized, leading to lost opportunity in
improving performance. A major cause of this is the
inefficiency of current warp scheduling policies in
tolerating long memory latencies. In this paper, we
identify that the scheduling decisions made by such
policies are agnostic to thread-block, or cooperative
thread array (CTA), behavior, and as a result
inefficient. We present a coordinated CTA-aware
scheduling policy that utilizes four schemes to
minimize the impact of long memory latencies. The first
two schemes, CTA-aware two-level warp scheduling and
locality aware warp scheduling, enhance per-core
performance by effectively reducing cache contention
and improving latency hiding capability. The third
scheme, bank-level parallelism aware warp scheduling,
improves overall GPGPU performance by enhancing DRAM
bank-level parallelism. The fourth scheme employs
opportunistic memory-side prefetching to further
enhance performance by taking advantage of open DRAM
rows. Evaluations on a 28-core GPGPU platform with
highly memory-intensive applications indicate that our
proposed mechanism can provide 33\% average performance
improvement compared to the commonly-employed
round-robin warp scheduling policy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Kegel:2013:DTU,
author = "Philipp Kegel and Michel Steuwer and Sergei Gorlatch",
title = "{dOpenCL}: Towards uniform programming of distributed
heterogeneous multi-\slash many-core systems",
journal = j-J-PAR-DIST-COMP,
volume = "73",
number = "12",
pages = "1639--1648",
month = dec,
year = "2013",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Nov 29 09:55:28 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731513001597",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Khanna:2013:HPN,
author = "Gaurav Khanna",
title = "High-Precision Numerical Simulations on a {CUDA GPU}:
{Kerr} Black Hole Tails",
journal = j-J-SCI-COMPUT,
volume = "56",
number = "2",
pages = "366--380",
month = aug,
year = "2013",
CODEN = "JSCOEB",
DOI = "https://doi.org/10.1007/s10915-012-9679-3",
ISSN = "0885-7474 (print), 1573-7691 (electronic)",
ISSN-L = "0885-7474",
bibdate = "Sat Mar 8 11:16:21 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7474&volume=56&issue=2;
http://www.math.utah.edu/pub/tex/bib/jscicomput.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10915-012-9679-3;
http://link.springer.com/content/pdf/10.1007/s10915-012-9679-3.pdf",
acknowledgement = ack-nhfb,
fjournal = "Journal of Scientific Computing",
journal-URL = "http://link.springer.com/journal/10915",
}
@Article{Kim:2013:MPE,
author = "Yooseong Kim and Aviral Shrivastava",
title = "Memory performance estimation of {CUDA} programs",
journal = j-TECS,
volume = "13",
number = "2",
pages = "21:1--21:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514648",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "CUDA has successfully popularized GPU computing, and
GPGPU applications are now used in various embedded
systems. The CUDA programming model provides a simple
interface to program on GPUs, but tuning GPGPU
applications for high performance is still quite
challenging. Programmers need to consider numerous
architectural details, and small changes in source
code, especially on the memory access pattern, can
affect performance significantly. This makes it very
difficult to optimize CUDA programs. This article
presents CuMAPz, which is a tool to analyze and compare
the memory performance of CUDA programs. CuMAPz can
help programmers explore different ways of using shared
and global memories, and optimize their program for
efficient memory behavior. CuMAPz models several
memory-performance-related factors: data reuse, global
memory access coalescing, global memory latency hiding,
shared memory bank conflict, channel skew, and branch
divergence. Experimental results show that CuMAPz can
accurately estimate performance with correlation
coefficient of 0.96. By using CuMAPz to explore the
memory access design space, we could improve the
performance of our benchmarks by 30\% more than the
previous approach [Hong and Kim 2010].",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}
@Article{Krotkiewski:2013:ESC,
author = "Marcin Krotkiewski and Marcin Dabrowski",
title = "Efficient {$3$D} stencil computations using {CUDA}",
journal = j-PARALLEL-COMPUTING,
volume = "39",
number = "10",
pages = "533--548",
month = oct,
year = "2013",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Sep 30 16:37:36 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S016781911300094X",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Kruzel:2013:VOI,
author = "Filip Kruzel and Krzysztof Bana{\'s}",
title = "Vectorized {OpenCL} implementation of numerical
integration for higher order finite elements",
journal = j-COMPUT-MATH-APPL,
volume = "66",
number = "10",
pages = "2030--2044",
month = dec,
year = "2013",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Wed Mar 1 21:51:22 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S089812211300521X",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Kuckuk:2013:IPD,
author = "Sebastian Kuckuk and Tobias Preclik and Harald
K{\"o}stler",
title = "Interactive particle dynamics using {OpenCL} and
{Kinect}",
journal = j-INT-J-PAR-EMER-DIST-SYS,
volume = "28",
number = "6",
pages = "519--536",
year = "2013",
DOI = "https://doi.org/10.1080/17445760.2012.745671",
bibdate = "Thu Mar 6 05:45:37 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel, Emergent and
Distributed Systems: IJPEDS",
journal-URL = "http://www.tandfonline.com/loi/gpaa20",
}
@Article{Kumar:2013:GAI,
author = "Piyush Kumar and Anupam Agrawal",
title = "GPU-Accelerated Interactive Visualization of {$ 3 D $}
Volumetric Data Using {CUDA}",
journal = j-INT-J-IMAGE-GRAPHICS,
volume = "13",
number = "2",
pages = "??--??",
month = apr,
year = "2013",
CODEN = "????",
ISSN = "0219-4678",
ISSN-L = "0219-4678",
bibdate = "Tue Aug 6 10:37:51 MDT 2013",
bibsource = "http://ejournals.wspc.com.sg/ijig/ijig.shtml;
http://www.math.utah.edu/pub/tex/bib/ijig.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://doi.acm.org/10.1142/S0219467813400032",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Image and Graphics (IJIG)",
journal-URL = "http://www.worldscientific.com/worldscinet/ijig",
}
@Article{Kunaseth:2013:ASD,
author = "Manaschai Kunaseth and David F. Richards and James N.
Glosli",
title = "Analysis of scalable data-privatization threading
algorithms for hybrid {MPI\slash OpenMP}
parallelization of molecular dynamics",
journal = j-J-SUPERCOMPUTING,
volume = "66",
number = "1",
pages = "406--430",
month = oct,
year = "2013",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-013-0915-x",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Feb 8 11:13:32 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=66&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-013-0915-x",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Li:2013:COM,
author = "Hung-Fu Li and Tyng-Yeu Liang and Jun-Yao Chiu",
title = "A compound {OpenMP\slash MPI} program development
toolkit for hybrid {CPU\slash GPU} clusters",
journal = j-J-SUPERCOMPUTING,
volume = "66",
number = "1",
pages = "381--405",
month = oct,
year = "2013",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-013-0912-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Feb 8 11:13:32 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=66&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-013-0912-0",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Liu:2013:DLO,
author = "Jun Liu and Wei Ding and Ohyoung Jang and Mahmut
Kandemir",
title = "Data layout optimization for {GPGPU} architectures",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "283--284",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442546",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "GPUs are being widely used in accelerating
general-purpose applications, leading to the emergence
of GPGPU architectures. New programming models, e.g.,
Compute Unified Device Architecture (CUDA), have been
proposed to facilitate programming general-purpose
computations in GPGPUs. However, writing
high-performance CUDA codes manually is still tedious
and difficult. In particular, the organization of the
data in the memory space can greatly affect the
performance due to the unique features of a custom
GPGPU memory hierarchy. In this work, we propose an
automatic data layout transformation framework to solve
the key issues associated with a GPGPU memory hierarchy
(i.e., channel skewing, data coalescing, and bank
conflicts). Our approach employs a widely applicable
strategy based on a novel concept called data
localization. Specifically, we try to optimize the
layout of the arrays accessed in affine loop nests, for
both the device memory and shared memory, at both
coarse grain and fine grain parallelization levels. We
performed an experimental evaluation of our data layout
optimization strategy using 15 benchmarks on an NVIDIA
CUDA GPU device. The results show that the proposed
data transformation approach brings around 4.3X speedup
on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Lu:2013:MLP,
author = "Ligang Lu and Karen Magerlein",
title = "Multi-level parallel computing of reverse time
migration for seismic imaging on {Blue Gene/Q}",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "291--292",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442550",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Blue Gene/Q (BG/Q) is an early representative of
increasing scale and thread count that will
characterize future HPC systems: large counts of nodes,
cores, and threads; and a rich programming environment
with many degrees of freedom in parallel computing
optimization. So it is both a challenge and an
opportunity to it to accelerate the seismic imaging
applications to the unprecedented levels that will
significantly advance the technologies for the oil and
gas industry. In this work we aim to address two
important questions: how HPC systems with high levels
of scale and thread count will perform in real
applications; and how systems with many degrees of
freedom in parallel programming can be calibrated to
achieve optimal performance. Based on BG/Q's
architecture features and RTM workload characteristics,
we developed massive domain partition, MPI, and SIMD
Our detailed deep analyses in various aspects of
optimization also provide valuable experience and
insights into how can be utilized to facilitate the
advance of seismic imaging technologies. Our BG/Q RTM
solution achieved a 14.93x speedup over the BG/P
implementation. Our multi-level parallelism strategies
for Reverse Time Migration (RTM) seismic imaging
computing on BG/Q provides an example of how HPC
systems like BG/Q can accelerate applications to a new
level.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Lu:2013:WGA,
author = "Xiangwen Lu and Jiabin Yuan and Weiwei Zhang",
title = "Workflow of the {Grover} algorithm simulation
incorporating {CUDA} and {GPGPU}",
journal = j-COMP-PHYS-COMM,
volume = "184",
number = "9",
pages = "2035--2041",
month = sep,
year = "2013",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Aug 26 14:34:22 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465513001148",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Ma:2013:KAT,
author = "Teng Ma and George Bosilca and Aurelien Bouteiller and
Jack J. Dongarra",
title = "Kernel-assisted and topology-aware {MPI} collective
communications on multicore\slash many-core platforms",
journal = j-J-PAR-DIST-COMP,
volume = "73",
number = "7",
pages = "1000--1010",
month = jul,
year = "2013",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon Aug 26 16:44:35 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731513000166",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Mohamed:2013:MMM,
author = "Hisham Mohamed and St{\'e}phane Marchand-Maillet",
title = "{MRO-MPI}: {MapReduce} overlapping using {MPI} and an
optimized data exchange policy",
journal = j-PARALLEL-COMPUTING,
volume = "39",
number = "12",
pages = "851--866",
month = dec,
year = "2013",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Dec 3 18:06:48 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001026",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Nandivada:2013:TFO,
author = "V. Krishna Nandivada and Jun Shirako and Jisheng Zhao
and Vivek Sarkar",
title = "A Transformation Framework for Optimizing
Task-Parallel Programs",
journal = j-TOPLAS,
volume = "35",
number = "1",
pages = "3:1--3:??",
month = apr,
year = "2013",
CODEN = "ATPSDT",
DOI = "https://doi.org/10.1145/2450136.2450138",
ISSN = "0164-0925 (print), 1558-4593 (electronic)",
ISSN-L = "0164-0925",
bibdate = "Tue Apr 30 18:56:06 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/toplas/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toplas.bib",
abstract = "Task parallelism has increasingly become a trend with
programming models such as OpenMP 3.0, Cilk, Java
Concurrency, X10, Chapel and Habanero-Java (HJ) to
address the requirements of multicore programmers.
While task parallelism increases productivity by
allowing the programmer to express multiple levels of
parallelism, it can also lead to performance
degradation due to increased overheads. In this
article, we introduce a transformation framework for
optimizing task-parallel programs with a focus on task
creation and task termination operations. These
operations can appear explicitly in constructs such as
async, finish in X10 and HJ, task, taskwait in OpenMP
3.0, and spawn, sync in Cilk, or implicitly in
composite code statements such as foreach and ateach
loops in X10, forall and foreach loops in HJ, and
parallel loop in OpenMP. Our framework includes a
definition of data dependence in task-parallel
programs, a happens-before analysis algorithm, and a
range of program transformations for optimizing task
parallelism. Broadly, our transformations cover three
different but interrelated optimizations: (1)
finish-elimination, (2) forall-coarsening, and (3)
loop-chunking. Finish-elimination removes redundant
task termination operations, forall-coarsening replaces
expensive task creation and termination operations with
more efficient synchronization operations, and
loop-chunking extracts useful parallelism from ideal
parallelism. All three optimizations are specified in
an iterative transformation framework that applies a
sequence of relevant transformations until a fixed
point is reached. Further, we discuss the impact of
exception semantics on the specified transformations,
and extend them to handle task-parallel programs with
precise exception semantics. Experimental results were
obtained for a collection of task-parallel benchmarks
on three multicore platforms: a dual-socket 128-thread
(16-core) Niagara T2 system, a quad-socket 16-core
Intel Xeon SMP, and a quad-socket 32-core Power7 SMP.
We have observed that the proposed optimizations
interact with each other in a synergistic way, and
result in an overall geometric average performance
improvement between 6.28$ \times $ and 10.30$ \times $,
measured across all three platforms for the benchmarks
studied.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Programming Languages and
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783",
}
@Article{Pai:2013:IGC,
author = "Sreepathi Pai and Matthew J. Thazhuthaveetil and R.
Govindarajan",
title = "Improving {GPGPU} concurrency with elastic kernels",
journal = j-SIGPLAN,
volume = "48",
number = "4",
pages = "407--418",
month = apr,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499368.2451160",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:23 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Each new generation of GPUs vastly increases the
resources available to GPGPU programs. GPU programming
models (like CUDA) were designed to scale to use these
resources. However, we find that CUDA programs actually
do not scale to utilize all available resources, with
over 30\% of resources going unused on average for
programs of the Parboil2 suite that we used in our
work. Current GPUs therefore allow concurrent execution
of kernels to improve utilization. In this work, we
study concurrent execution of GPU kernels using
multiprogram workloads on current NVIDIA Fermi GPUs. On
two-program workloads from the Parboil2 benchmark suite
we find concurrent execution is often no better than
serialized execution. We identify that the lack of
control over resource allocation to kernels is a major
serialization bottleneck. We propose transformations
that convert CUDA kernels into elastic kernels which
permit fine-grained control over their resource usage.
We then propose several elastic-kernel aware
concurrency policies that offer significantly better
performance and concurrency compared to the current
CUDA policy. We evaluate our proposals on real hardware
using multiprogrammed workloads constructed from
benchmarks in the Parboil 2 suite. On average, our
proposals increase system throughput (STP) by 1.21x and
improve the average normalized turnaround time (ANTT)
by 3.73x for two-program workloads when compared to the
current CUDA concurrency implementation.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '13 conference proceedings.",
}
@Article{Papakonstantinou:2013:ECC,
author = "Alexandros Papakonstantinou and Karthik Gururaj and
John A. Stratton and Deming Chen and Jason Cong and
Wen-Mei W. Hwu",
title = "Efficient compilation of {CUDA} kernels for
high-performance computing on {FPGAs}",
journal = j-TECS,
volume = "13",
number = "2",
pages = "25:1--25:??",
month = sep,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2514641.2514652",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Fri Sep 27 18:13:13 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The rise of multicore architectures across all
computing domains has opened the door to heterogeneous
multiprocessors, where processors of different compute
characteristics can be combined to effectively boost
the performance per watt of different application
kernels. GPUs, in particular, are becoming very popular
for speeding up compute-intensive kernels of
scientific, imaging, and simulation applications. New
programming models that facilitate parallel processing
on heterogeneous systems containing GPUs are spreading
rapidly in the computing community. By leveraging these
investments, the developers of other accelerators have
an opportunity to significantly reduce the programming
effort by supporting those accelerator models already
gaining popularity. In this work, we adapt one such
language, the CUDA programming model, into a new FPGA
design flow called FCUDA, which efficiently maps the
coarse- and fine-grained parallelism exposed in CUDA
onto the reconfigurable fabric. Our CUDA-to-FPGA flow
employs AutoPilot, an advanced high-level synthesis
tool (available from Xilinx) which enables
high-abstraction FPGA programming. FCUDA is based on a
source-to-source compilation that transforms the SIMT
(Single Instruction, Multiple Thread) CUDA code into
task-level parallel C code for AutoPilot. We describe
the details of our CUDA-to-FPGA flow and demonstrate
the highly competitive performance of the resulting
customized FPGA multicore accelerators. To the best of
our knowledge, this is the first CUDA-to-FPGA flow to
demonstrate the applicability and potential advantage
of using the CUDA programming model for
high-performance computing in FPGAs.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?&idx=J840",
}
@Article{Pennycook:2013:IPP,
author = "S. J. Pennycook and S. D. Hammond and S. A. Wright and
J. A. Herdman and I. Miller and S. A. Jarvis",
title = "An investigation of the performance portability of
{OpenCL}",
journal = j-J-PAR-DIST-COMP,
volume = "73",
number = "11",
pages = "1439--1450",
month = nov,
year = "2013",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon Sep 23 11:46:28 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/07437315",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731512001669",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Poulson:2013:ENF,
author = "Jack Poulson and Bryan Marker and Robert A. van de
Geijn and Jeff R. Hammond and Nichols A. Romero",
title = "{Elemental}: a New Framework for Distributed Memory
Dense Matrix Computations",
journal = j-TOMS,
volume = "39",
number = "2",
pages = "13:1--13:24",
month = feb,
year = "2013",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2427023.2427030",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Feb 20 16:46:13 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "Parallelizing dense matrix computations to distributed
memory architectures is a well-studied subject and
generally considered to be among the best understood
domains of parallel computing. Two packages, developed
in the mid 1990s, still enjoy regular use: ScaLAPACK
and PLAPACK. With the advent of many-core
architectures, which may very well take the shape of
distributed memory architectures within a single
processor, these packages must be revisited since the
traditional MPI-based approaches will likely need to be
extended. Thus, this is a good time to review lessons
learned since the introduction of these two packages
and to propose a simple yet effective alternative.
Preliminary performance results show the new solution
achieves competitive, if not superior, performance on
large clusters.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Ragan-Kelley:2013:HLC,
author = "Jonathan Ragan-Kelley and Connelly Barnes and Andrew
Adams and Sylvain Paris and Fr{\'e}do Durand and Saman
Amarasinghe",
title = "{Halide}: a language and compiler for optimizing
parallelism, locality, and recomputation in image
processing pipelines",
journal = j-SIGPLAN,
volume = "48",
number = "6",
pages = "519--530",
month = jun,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499370.2462176",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:38 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Image processing pipelines combine the challenges of
stencil computations and stream programs. They are
composed of large graphs of different stencil stages,
as well as complex reductions, and stages with global
or data-dependent access patterns. Because of their
complex structure, the performance difference between a
naive implementation of a pipeline and an optimized one
is often an order of magnitude. Efficient
implementations require optimization of both
parallelism and locality, but due to the nature of
stencils, there is a fundamental tension between
parallelism, locality, and introducing redundant
recomputation of shared values. We present a systematic
model of the tradeoff space fundamental to stencil
pipelines, a schedule representation which describes
concrete points in this space for each stage in an
image processing pipeline, and an optimizing compiler
for the Halide image processing language that
synthesizes high performance implementations from a
Halide algorithm and a schedule. Combining this
compiler with stochastic search over the space of
schedules enables terse, composable programs to achieve
state-of-the-art performance on a wide range of real
image processing pipelines, and across different
hardware architectures, including multicores with SIMD,
and heterogeneous CPU+GPU execution. From simple Halide
programs written in a few hours, we demonstrate
performance up to 5x faster than hand-tuned C,
intrinsics, and CUDA implementations optimized by
experts over weeks or months, for image processing
applications beyond the reach of past automatic
compilers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '13 conference proceedings.",
}
@Article{Reyes:2013:PEO,
author = "Ruym{\'a}n Reyes and Iv{\'a}n L{\'o}pez and Juan J.
Fumero and Francisco de Sande",
title = "A preliminary evaluation of {OpenACC}
implementations",
journal = j-J-SUPERCOMPUTING,
volume = "65",
number = "3",
pages = "1063--1075",
month = sep,
year = "2013",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-012-0853-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Feb 8 10:21:44 MST 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=65&issue=3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-012-0853-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Rodrigues:2013:MAA,
author = "A. Wendell O. Rodrigues and Fr{\'e}d{\'e}ric
Guyomarc'h and Jean-Luc Dekeyser",
title = "An {MDE} Approach for Automatic Code Generation from
{UML\slash MARTE} to {OpenCL}",
journal = j-COMPUT-SCI-ENG,
volume = "15",
number = "1",
pages = "46--55",
month = jan # "\slash " # feb,
year = "2013",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2012.35",
ISSN = "1521-9615",
ISSN-L = "1521-9615",
bibdate = "Fri Jun 21 08:34:49 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Rodrigues:2013:POM,
author = "Eduardo R. Rodrigues and Philippe O. A. Navaux and
Jairo Panetta and Celso L. Mendes",
title = "Preserving the original {MPI} semantics in a
virtualized processor environment",
journal = j-SCI-COMPUT-PROGRAM,
volume = "78",
number = "4",
pages = "412--421",
day = "1",
month = apr,
year = "2013",
CODEN = "SCPGD4",
DOI = "https://doi.org/10.1016/j.scico.2012.07.005",
ISSN = "0167-6423 (print), 1872-7964 (electronic)",
ISSN-L = "0167-6423",
bibdate = "Mon Feb 4 10:59:59 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scicomputprogram.bib;
http://www.sciencedirect.com/science/journal/01676423",
URL = "http://www.sciencedirect.com/science/article/pii/S0167642312001335",
acknowledgement = ack-nhfb,
fjournal = "Science of Computer Programming",
journal-URL = "http://www.sciencedirect.com/science/journal/01676423",
remark = "Secial section on Mutation Testing and Analysis
(Mutation 2010) \& Special section on the Programming
Languages track at the 25th ACM Symposium on Applied
Computing.",
}
@Article{Rosen:2013:PVA,
author = "Paul Rosen",
title = "Performance: A Visual Approach to Investigating Shared
and Global Memory Behavior of {CUDA} Kernels",
journal = j-CGF,
volume = "32",
number = "3pt2",
pages = "161--170",
month = jun,
year = "2013",
CODEN = "CGFODY",
DOI = "https://doi.org/10.1111/cgf.12103",
ISSN = "0167-7055 (print), 1467-8659 (electronic)",
ISSN-L = "0167-7055",
bibdate = "Sat Feb 8 15:27:43 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cgf.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computer Graphics World",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1467-8659/",
onlinedate = "1 Jul 2013",
}
@Article{Sampaio:2013:DA,
author = "Diogo Sampaio and Rafael Martins de Souza and Sylvain
Collange and Fernando Magno Quint{\~a}o Pereira",
title = "Divergence analysis",
journal = j-TOPLAS,
volume = "35",
number = "4",
pages = "13:1--13:??",
month = dec,
year = "2013",
CODEN = "ATPSDT",
DOI = "https://doi.org/10.1145/2523815",
ISSN = "0164-0925 (print), 1558-4593 (electronic)",
ISSN-L = "0164-0925",
bibdate = "Tue Dec 31 14:22:03 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/toplas/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toplas.bib",
abstract = "Growing interest in graphics processing units has
brought renewed attention to the Single Instruction
Multiple Data (SIMD) execution model. SIMD machines
give application developers tremendous computational
power; however, programming them is still challenging.
In particular, developers must deal with memory and
control-flow divergences. These phenomena stem from a
condition that we call data divergence, which occurs
whenever two processing elements (PEs) see the same
variable name holding different values. This article
introduces divergence analysis, a static analysis that
discovers data divergences. This analysis, currently
deployed in an industrial quality compiler, is useful
in several ways: it improves the translation of SIMD
code to non-SIMD CPUs, it helps developers to manually
improve their SIMD applications, and it also guides the
automatic optimization of SIMD programs. We demonstrate
this last point by introducing the notion of a
divergence-aware register spiller. This spiller uses
information from our analysis to either rematerialize
or share common data between PEs. As a testimony of its
effectiveness, we have tested it on a suite of 395 CUDA
kernels from well-known benchmarks. The
divergence-aware spiller produces GPU code that is
26.21\% faster than the code produced by the register
allocator used in the baseline compiler.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Programming Languages and
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783",
}
@Article{Shen:2013:ACE,
author = "Jie Shen and Jianbin Fang and Henk Sips and Ana Lucia
Varbanescu",
title = "An application-centric evaluation of {OpenCL} on
multi-core {CPUs}",
journal = j-PARALLEL-COMPUTING,
volume = "39",
number = "12",
pages = "834--850",
month = dec,
year = "2013",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Dec 3 18:06:48 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001014",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{SM-D:2013:BRC,
author = "SM-D",
title = "Book Review: {{\booktitle{CUDA Programming}}, Shane
Cook. Morgan Kaufmann. ISBN 978-0-12-415933-4}",
journal = j-NETWORK-SECURITY,
volume = "2013",
number = "1",
pages = "4--4",
month = jan,
year = "2013",
CODEN = "NTSCF5",
DOI = "https://doi.org/10.1016/S1353-4858(13)70015-1",
ISSN = "1353-4858 (print), 1872-9371 (electronic)",
ISSN-L = "1353-4858",
bibdate = "Mon Dec 4 17:00:50 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/network-security.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S1353485813700151",
acknowledgement = ack-nhfb,
fjournal = "Network Security",
journal-URL = "https://www.sciencedirect.com/journal/network-security",
}
@Article{Totoni:2013:EFE,
author = "Ehsan Totoni and Mert Dikmen and Mar{\'\i}a Jes{\'u}s
Garzar{\'a}n",
title = "Easy, fast, and energy-efficient object detection on
heterogeneous on-chip architectures",
journal = j-TACO,
volume = "10",
number = "4",
pages = "45:1--45:??",
month = dec,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2541228.2555302",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Thu Jan 9 10:42:35 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "We optimize a visual object detection application
(that uses Vision Video Library kernels) and show that
OpenCL is a unified programming paradigm that can
provide high performance when running on the Ivy Bridge
heterogeneous on-chip architecture. We evaluate
different mapping techniques and show that running each
kernel where it fits the best and using software
pipelining can provide 1.91 times higher performance
and 42\% better energy efficiency. We also show how to
trade accuracy for energy at runtime. Overall, our
application can perform accurate object detection at 40
frames per second (fps) in an energy-efficient
manner.",
acknowledgement = ack-nhfb,
articleno = "45",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Vaidya:2013:SDO,
author = "Aniruddha S. Vaidya and Anahita Shayesteh and Dong
Hyuk Woo and Roy Saharoy and Mani Azimi",
title = "{SIMD} divergence optimization through intra-warp
compaction",
journal = j-COMP-ARCH-NEWS,
volume = "41",
number = "3",
pages = "368--379",
month = jun,
year = "2013",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2508148.2485954",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Sat Jul 27 06:58:55 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
note = "ICSA '13 conference proceedings.",
abstract = "SIMD execution units in GPUs are increasingly used for
high performance and energy efficient acceleration of
general purpose applications. However, SIMD control
flow divergence effects can result in reduced execution
efficiency in a class of GPGPU applications, classified
as divergent applications. Improving SIMD efficiency,
therefore, has the potential to bring significant
performance and energy benefits to a wide range of such
data parallel applications. Recently, the SIMD
divergence problem has received increased attention,
and several micro-architectural techniques have been
proposed to address various aspects of this problem.
However, these techniques are often quite complex and,
therefore, unlikely candidates for practical
implementation. In this paper, we propose two
micro-architectural optimizations for GPGPU
architectures, which utilize relatively simple
execution cycle compression techniques when certain
groups of turned-off lanes exist in the instruction
stream. We refer to these optimizations as basic cycle
compression (BCC) and swizzled-cycle compression (SCC),
respectively. In this paper, we will outline the
additional requirements for implementing these
optimizations in the context of the studied GPGPU
architecture. Our evaluations with divergent SIMD
workloads from OpenCL (GPGPU) and OpenGL (graphics)
applications show that BCC and SCC reduce execution
cycles in divergent applications by as much as 42\%
(20\% on average). For a subset of divergent workloads,
the execution time is reduced by an average of 7\% for
today's GPUs or by 18\% for future GPUs with a better
provisioned memory subsystem. The key contribution of
our work is in simplifying the micro-architecture for
delivering divergence optimizations while providing the
bulk of the benefits of more complex approaches.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
}
@Article{Vogel:2013:BWC,
author = "Thomas Vogel",
title = "{{\booktitle{All the Way to CUDA}}} [Book review]",
journal = j-COMPUT-SCI-ENG,
volume = "15",
number = "5",
pages = "6--8",
month = sep # "\slash " # oct,
year = "2013",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2013.101",
ISSN = "1521-9615",
ISSN-L = "1521-9615",
bibdate = "Sat Apr 19 10:17:39 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Wang:2013:PMO,
author = "Cheng Wang and Sunita Chandrasekaran and Peng Sun and
Barbara Chapman and Jim Holt",
title = "Portable mapping of {openMP} to multicore embedded
systems using {MCA APIs}",
journal = j-SIGPLAN,
volume = "48",
number = "5",
pages = "153--162",
month = may,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2499369.2465569",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Jul 1 17:15:32 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Multicore embedded systems are being widely used in
telecommunication systems, robotics, medical
applications and more.While they offer a
high-performance with low-power solution, programming
in an efficient way is still a challenge. In order to
exploit the capabilities that the hardware offers,
software developers are expected to handle many of the
low-level details of programming including utilizing
DMA, ensuring cache coherency, and inserting
synchronization primitives explicitly. The
state-of-the-art involves solutions where the software
toolchain is too vendor-specific thus tying the
software to a particular hardware leaving no room-for
portability. In this paper we present a runtime system
to explore mapping a high-level programming model,
OpenMP, on to multicore embedded systems. A key feature
of our scheme is that unlike the existing approaches
that largely rely on POSIX threads, our approach
leverages the Multicore Association (MCA) APIs as an
OpenMP translation layer. The MCA APIs is a set of
low-level APIs handling resource management,
inter-process communications and task scheduling for
multicore embedded systems. By deploying the MCA APIs,
our runtime is able to effectively capture the
characteristics of multicore embedded systems compared
with the POSIX threads. Furthermore, the MCA layer
enables our runtime implementation to be portable
across various architectures. Thus programmers only
need to maintain a single OpenMP code base which is
compatible by various compilers, while on the other
hand, the code is portable across different possible
types of platforms. We have evaluated our runtime
system using several embedded benchmarks. The
experiments demonstrate promising and competitive
performance compared to the native approach for the
platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "GPCE '12 conference proceedings.",
}
@Article{Wu:2013:PMH,
author = "Xingfu Wu and Valerie Taylor",
title = "Performance modeling of hybrid {MPI\slash OpenMP}
scientific applications on large-scale multicore
supercomputers",
journal = j-J-COMP-SYS-SCI,
volume = "79",
number = "8",
pages = "1256--1268",
month = dec,
year = "2013",
CODEN = "JCSSBM",
DOI = "https://doi.org/10.1016/j.jcss.2013.02.005",
ISSN = "0022-0000 (print), 1090-2724 (electronic)",
ISSN-L = "0022-0000",
bibdate = "Tue Jan 29 15:27:23 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcompsyssci.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0022000013000639",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computer and System Sciences",
journal-URL = "http://www.sciencedirect.com/science/journal/00220000",
}
@Article{Xu:2013:PMO,
author = "Shiming Xu and Wei Xue and Hai Xiang Lin",
title = "Performance modeling and optimization of sparse
matrix-vector multiplication on {NVIDIA CUDA}
platform",
journal = j-J-SUPERCOMPUTING,
volume = "63",
number = "3",
pages = "710--721",
month = mar,
year = "2013",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-011-0626-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Apr 1 14:50:47 MDT 2013",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=63&issue=3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-011-0626-0;
http://link.springer.com/content/pdf/10.1007/s11227-011-0626-0",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Yan:2013:SFS,
author = "Shengen Yan and Guoping Long and Yunquan Zhang",
title = "{StreamScan}: fast scan algorithms for {GPUs} without
global barrier synchronization",
journal = j-SIGPLAN,
volume = "48",
number = "8",
pages = "229--238",
month = aug,
year = "2013",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2517327.2442539",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Aug 26 13:48:51 MDT 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
note = "PPoPP '13 Conference proceedings.",
abstract = "Scan (also known as prefix sum) is a very useful
primitive for various important parallel algorithms,
such as sort, BFS, SpMV, compaction and so on. Current
state of the art of GPU based scan implementation
consists of three consecutive Reduce-Scan-Scan phases.
This approach requires at least two global barriers and
3N (N is the problem size) global memory accesses. In
this paper we propose StreamScan, a novel approach to
implement scan on GPUs with only one computation phase.
The main idea is to restrict synchronization to only
adjacent workgroups, and thereby eliminating global
barrier synchronization completely. The new approach
requires only 2N global memory accesses and just one
kernel invocation. On top of this we propose two
important optimizations to further boost performance
speedups, namely thread grouping to eliminate
unnecessary local barriers, and register optimization
to expand the on chip problem size. We designed an
auto-tuning framework to search the parameter space
automatically to generate highly optimized codes for
both AMD and Nvidia GPUs. We implemented our technique
with OpenCL. Compared with previous fast scan
implementations, experimental results not only show
promising performance speedups, but also reveal
dramatic different optimization tradeoffs between
Nvidia and AMD GPU platforms.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
}
@Article{Yu:2013:AGA,
author = "Zhibin Yu and Lieven Eeckhout and Nilanjan Goswami and
Tao Li and Lizy John and Hai Jin and Chengzhong Xu",
title = "Accelerating {GPGPU} architecture simulation",
journal = j-SIGMETRICS,
volume = "41",
number = "1",
pages = "331--332",
month = jun,
year = "2013",
CODEN = "????",
DOI = "https://doi.org/10.1145/2494232.2465540",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Fri Feb 28 06:09:59 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigmetrics.bib",
abstract = "Recently, graphics processing units (GPUs) have opened
up new opportunities for speeding up general-purpose
parallel applications due to their massive
computational power and up to hundreds of thousands of
threads enabled by programming models such as CUDA.
However, due to the serial nature of existing
micro-architecture simulators, these massively parallel
architectures and workloads need to be simulated
sequentially. As a result, simulating GPGPU
architectures with typical benchmarks and input data
sets is extremely time-consuming. This paper addresses
the GPGPU architecture simulation challenge by
generating miniature, yet representative GPGPU kernels.
We first summarize the static characteristics of an
existing GPGPU kernel in a profile, and analyze its
dynamic behavior using the novel concept of the
divergence flow statistics graph (DFSG). We
subsequently use a GPGPU kernel synthesizing framework
to generate a miniature proxy of the original kernel,
which can reduce simulation time significantly. The key
idea is to reduce the number of simulated instructions
by decreasing per-thread iteration counts of loops. Our
experimental results show that our approach can
accelerate GPGPU architecture simulation by a factor of
88X on average and up to 589X with an average IPC
relative error of 5.6\%.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "http://portal.acm.org/toc.cfm?id=J618",
}
@Article{Zhang:2013:MPI,
author = "Xiaohua Zhang and Sergio E. Wong and Felice C.
Lightstone",
title = "Message passing interface and multithreading hybrid
for parallel molecular docking of large databases on
petascale high performance computing machines",
journal = j-J-COMPUT-CHEM,
volume = "34",
number = "11",
pages = "915--927",
day = "30",
month = apr,
year = "2013",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.23214",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Mon Apr 1 14:26:54 MDT 2013",
bibsource = "http://www.interscience.wiley.com/jpages/0192-8651;
http://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www3.interscience.wiley.com/journalfinder.html",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1096-987X",
onlinedate = "23 Jan 2013",
}
@Article{Amritkar:2014:EPC,
author = "Amit Amritkar and Surya Deb and Danesh Tafti",
title = "Efficient parallel {CFD-DEM} simulations using
{OpenMP}",
journal = j-J-COMPUT-PHYS,
volume = "256",
number = "??",
pages = "501--519",
day = "1",
month = jan,
year = "2014",
CODEN = "JCTPAH",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Wed Nov 13 14:21:07 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999113006128",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Antonelli:2014:ATS,
author = "Laura Antonelli and Stefania Corsaro and Zelda Marino
and Mariarosaria Rizzardi",
title = "Algorithm 944: {Talbot} Suite: Parallel
Implementations of {Talbot}'s Method for the Numerical
Inversion of {Laplace} Transforms",
journal = j-TOMS,
volume = "40",
number = "4",
pages = "29:1--29:18",
month = jun,
year = "2014",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2616909",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Jul 2 18:28:58 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "We present Talbot Suite, a C parallel software
collection for the numerical inversion of Laplace
Transforms, based on Talbot's method. It is designed to
fit both single and multiple Laplace inversion
problems, which arise in several application and
research fields. In our software, we achieve high
accuracy and efficiency, making full use of modern
architectures and introducing two different levels of
parallelism: coarse and fine grained parallelism. They
offer a reasonable tradeoff between accuracy, the main
aspect for a few inversions, and efficiency, the main
aspect for multiple inversions. To take into account
modern high-performance computing architectures, Talbot
Suite provides different software versions: an
OpenMP-based version for shared memory machines and a
MPI-based version for distributed memory machines.
Moreover, oriented to hybrid architectures, a combined
MPI/OpenMP-based implementation is provided too. We
describe our parallel algorithms and the software
organization. We also report some performance results.
Our software includes sample programs to call the
Talbot Suite functions from C and from MATLAB.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Awile:2014:PWF,
author = "Omar Awile and Ivo F. Sbalzarini",
title = "A {Pthreads} Wrapper for {Fortran 2003}",
journal = j-TOMS,
volume = "40",
number = "3",
pages = "19:1--19:15",
month = apr,
year = "2014",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2558889",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon Apr 21 17:42:14 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "With the advent of multicore processors, numerical and
mathematical software relies on parallelism in order to
benefit from hardware performance increases. We present
the design and use of a Fortran 2003 wrapper for POSIX
threads, called forthreads. Forthreads is complete in
the sense that is provides native Fortran 2003
interfaces to all pthreads routines where possible. We
demonstrate the use and efficiency of forthreads for
SIMD parallelism and task parallelism. We present
forthreads/MPI implementations that enable hybrid
shared-/distributed-memory parallelism in Fortran 2003.
Our benchmarks show that forthreads offers performance
comparable to that of OpenMP, but better thread control
and more freedom. We demonstrate the latter by
presenting a multithreaded Fortran 2003 library for
POSIX Internet sockets, enabling interactive numerical
simulations with runtime control.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Barrett:2014:EMM,
author = "Brian W. Barrett and Ron Brightwell and Ryan Grant and
Simon D. Hammond and K. Scott Hemmert",
title = "An evaluation of {MPI} message rate on hybrid-core
processors",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "415--424",
month = nov,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014552085",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/415",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Beaugnon:2014:VVO,
author = "Ulysse Beaugnon and Alexey Kravets and Sven van
Haastregt and Riyadh Baghdadi and David Tweed and Javed
Absar and Anton Lokhmotov",
title = "{VOBLA}: a vehicle for optimized basic linear
algebra",
journal = j-SIGPLAN,
volume = "49",
number = "5",
pages = "115--124",
month = may,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666357.2597818",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:37:30 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present VOBLA, a domain-specific language designed
for programming linear algebra libraries. VOBLA is
compiled to PENCIL, a domain independent intermediate
language designed for efficient mapping to accelerator
architectures such as GPGPUs. PENCIL is compiled to
efficient, platform-specific OpenCL code using
techniques based on the polyhedral model. This approach
addresses both the programmer productivity and
performance portability concerns associated with
accelerator programming. We demonstrate our approach by
using VOBLA to implement a BLAS library. We have
evaluated the performance of OpenCL code generated
using our compilation flow on ARM Mali, AMD Radeon, and
AMD Opteron platforms. The generated code is currently
on average 1.9x slower than highly hand-optimized
OpenCL code, but on average 8.1x faster than
straightforward OpenCL code. Given that the VOBLA
coding takes significantly less effort compared to
hand-optimizing OpenCL code, we believe our approach
leads to improved productivity and performance
portability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "LCTES '14 conference proceedings.",
}
@Article{Bethune:2014:PAA,
author = "Iain Bethune and J. Mark Bull and Nicholas J. Dingle
and Nicholas J. Higham",
title = "Performance analysis of asynchronous {Jacobi}'s method
implemented in {MPI}, {SHMEM} and {OpenMP}",
journal = j-IJHPCA,
volume = "28",
number = "1",
pages = "97--111",
month = feb,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342013493123",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Mar 14 15:39:59 MDT 2014",
bibsource = "http://hpc.sagepub.com/content/28/1.toc;
http://www.math.utah.edu/pub/bibnet/authors/h/higham-nicholas-john.bib;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/1/97.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "July 11, 2013",
}
@Article{Blas:2014:RAM,
author = "Javier Garcia Blas and Jesus Carretero",
title = "Recent advances in the {Message Passing Interface}",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "387--389",
month = nov,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014549273",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/387",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Coole:2014:FFH,
author = "James Coole and Greg Stitt",
title = "Fast, Flexible High-Level Synthesis from {OpenCL}
using Reconfiguration Contexts",
journal = j-IEEE-MICRO,
volume = "34",
number = "1",
pages = "42--53",
month = jan # "\slash " # feb,
year = "2014",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2013.108",
ISSN = "0272-1732",
ISSN-L = "0272-1732",
bibdate = "Thu Aug 21 08:02:34 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
}
@Article{Cores:2014:FAM,
author = "Iv{\'a}n Cores and Gabriel Rodr{\'\i}guez and Patricia
Gonz{\'a}lez and Mar{\'\i}a J. Mart{\'\i}n",
title = "Failure Avoidance in {MPI} Applications Using an
Application-Level Approach",
journal = j-COMP-J,
volume = "57",
number = "1",
pages = "100--114",
month = jan,
year = "2014",
CODEN = "CMPJA6",
DOI = "https://doi.org/10.1093/comjnl/bxs158",
ISSN = "0010-4620 (print), 1460-2067 (electronic)",
ISSN-L = "0010-4620",
bibdate = "Mon Feb 3 17:02:40 MST 2014",
bibsource = "http://comjnl.oxfordjournals.org/content/57/1.toc;
http://www.math.utah.edu/pub/tex/bib/compj2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://comjnl.oxfordjournals.org/content/57/1/100.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "Computer Journal",
journal-URL = "http://comjnl.oxfordjournals.org/",
onlinedate = "December 18, 2012",
}
@Article{Cores:2014:MAL,
author = "Iv{\'a}n Cores and Gabriel Rodr{\'\i}guez and
Mar{\'\i}a J. Mart{\'\i}n",
title = "In-memory application-level checkpoint-based migration
for {MPI} programs",
journal = j-J-SUPERCOMPUTING,
volume = "70",
number = "2",
pages = "660--670",
month = nov,
year = "2014",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1120-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Feb 13 12:32:19 MST 2015",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=70&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1120-2",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Cunningham:2014:RXE,
author = "David Cunningham and David Grove and Benjamin Herta
and Arun Iyengar and Kiyokuni Kawachiya and Hiroki
Murata and Vijay Saraswat and Mikio Takeuchi and
Olivier Tardieu",
title = "Resilient {X10}: efficient failure-aware programming",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "67--80",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555248",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scale-out programs run on multiple processes in a
cluster. In scale-out systems, processes can fail.
Computations using traditional libraries such as MPI
fail when any component process fails. The advent of
Map Reduce, Resilient Data Sets and MillWheel has shown
dramatic improvements in productivity are possible when
a high-level programming framework handles scale-out
and resilience automatically. We are concerned with the
development of general-purpose languages that support
resilient programming. In this paper we show how the
X10 language and implementation can be extended to
support resilience. In Resilient X10, places may fail
asynchronously, causing loss of the data and tasks at
the failed place. Failure is exposed through
exceptions. We identify a {\em Happens Before
Invariance Principle} and require the runtime to
automatically repair the global control structure of
the program to maintain this principle. We show this
reduces much of the burden of resilient programming.
The programmer is only responsible for continuing
execution with fewer computational resources and the
loss of part of the heap, and can do so while taking
advantage of domain knowledge. We build a complete
implementation of the language, capable of executing
benchmark applications on hundreds of nodes. We
describe the algorithms required to make the language
runtime resilient. We then give three applications,
each with a different approach to fault tolerance
(replay, decimation, and domain-level checkpointing).
These can be executed at scale and survive node
failure. We show that for these programs the overhead
of resilience is a small fraction of overall runtime by
comparing to equivalent non-resilient X10 programs. On
one program we show end-to-end performance of Resilient
X10 is ~100x faster than Hadoop.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{DAgostino:2014:CAM,
author = "Daniele D'Agostino and Andrea Clematis and Sergio
Decherchi and Walter Rocchia and Luciano Milanesi and
Ivan Merelli",
title = "{CUDA} accelerated molecular surface generation",
journal = j-CCPE,
volume = "26",
number = "10",
pages = "1819--1831",
month = jul,
year = "2014",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3120",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Sep 9 16:46:30 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "15 Aug 2013",
}
@Article{Didelot:2014:IMC,
author = "Sylvain Didelot and Patrick Carribault and Marc
P{\'e}rache and William Jalby",
title = "Improving {MPI} communication overlap with
collaborative polling",
journal = j-COMPUTING,
volume = "96",
number = "4",
pages = "263--278",
month = apr,
year = "2014",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-013-0327-z",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Fri Jun 6 10:07:21 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=96&issue=4;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s00607-013-0327-z",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Dinan:2014:ECC,
author = "James Dinan and Ryan E. Grant and Pavan Balaji and
David Goodell and Douglas Miller and Marc Snir and
Rajeev Thakur",
title = "Enabling communication concurrency through flexible
{MPI} endpoints",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "390--405",
month = nov,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014548772",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/390",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "September 23, 2014",
}
@Article{DiPierro:2014:PPP,
author = "Massimo {Di Pierro}",
title = "Portable Parallel Programs with {Python} and
{OpenCL}",
journal = j-COMPUT-SCI-ENG,
volume = "16",
number = "1",
pages = "34--40",
month = jan # "\slash " # feb,
year = "2014",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2013.99",
ISSN = "1521-9615",
ISSN-L = "1521-9615",
bibdate = "Sat Apr 19 10:17:39 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Fang:2014:API,
author = "Jianbin Fang and Henk Sips and Ana Lucia Varbanescu",
title = "{Aristotle}: A performance impact indicator for the
{OpenCL} kernels using local memory",
journal = j-SCI-PROG,
volume = "22",
number = "3",
pages = "239--257",
month = "????",
year = "2014",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.3233/SPR-140390",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Sep 9 18:01:15 MDT 2014",
bibsource = "http://www.iospress.nl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprog.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@InProceedings{Feng:2014:MSP,
author = "Chunsheng Feng and Shi Shu and Jinchao Xu and
Chen-Song Zhang",
title = "A Multi-Stage Preconditioner for the Black Oil Model
and Its {OpenMP} Implementation",
crossref = "Erhel:2014:DDM",
volume = "98",
pages = "141--153",
year = "2014",
DOI = "https://doi.org/10.1007/978-3-319-05789-7_11",
bibdate = "Sat Dec 12 10:22:13 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-319-05789-7_11/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-319-05789-7",
book-URL = "http://www.springerlink.com/content/978-3-319-05789-7",
}
@Article{Feng:2014:SBS,
author = "Xiaowen Feng and Hai Jin and Ran Zheng and Zhiyuan
Shao and Lei Zhu",
title = "A segment-based sparse matrix--vector multiplication
on {CUDA}",
journal = j-CCPE,
volume = "26",
number = "1",
pages = "271--286",
month = jan,
year = "2014",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.2978",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Feb 8 15:45:08 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "7 Dec 2012",
}
@Article{Gerstenberger:2014:EHS,
author = "Robert Gerstenberger and Maciej Besta and Torsten
Hoefler",
title = "Enabling highly-scalable remote memory access
programming with {MPI-3 One Sided}",
journal = j-SCI-PROG,
volume = "22",
number = "2",
pages = "75--91",
month = "????",
year = "2014",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.3233/SPR-140383",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Sep 9 18:01:01 MDT 2014",
bibsource = "http://www.iospress.nl/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprog.bib",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "http://iospress.metapress.com/content/1058-9244",
}
@Article{Gonina:2014:SMC,
author = "Ekaterina Gonina and Gerald Friedland and Eric
Battenberg and Penporn Koanantakool and Michael
Driscoll and Evangelos Georganas and Kurt Keutzer",
title = "Scalable multimedia content analysis on parallel
platforms using {Python}",
journal = j-TOMCCAP,
volume = "10",
number = "2",
pages = "18:1--18:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2517151",
ISSN = "1551-6857 (print), 1551-6865 (electronic)",
ISSN-L = "1551-6857",
bibdate = "Thu Mar 13 07:37:57 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tomccap/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/tomccap.bib",
abstract = "In this new era dominated by consumer-produced media
there is a high demand for web-scalable solutions to
multimedia content analysis. A compelling approach to
making applications scalable is to explicitly map their
computation onto parallel platforms. However,
developing efficient parallel implementations and fully
utilizing the available resources remains a challenge
due to the increased code complexity, limited
portability and required low-level knowledge of the
underlying hardware. In this article, we present
PyCASP, a Python-based framework that automatically
maps computation onto parallel platforms from Python
application code to a variety of parallel platforms.
PyCASP is designed using a systematic, pattern-oriented
approach to offer a single software development
environment for multimedia content analysis
applications. Using PyCASP, applications can be
prototyped in a couple hundred lines of Python code and
automatically scale to modern parallel processors.
Applications written with PyCASP are portable to a
variety of parallel platforms and efficiently scale
from a single desktop Graphics Processing Unit (GPU) to
an entire cluster with a small change to application
code. To illustrate our approach, we present three
multimedia content analysis applications that use our
framework: a state-of-the-art speaker diarization
application, a content-based music recommendation
system based on the Million Song Dataset, and a video
event detection system for consumer-produced videos. We
show that across this wide range of applications, our
approach achieves the goal of automatic portability and
scalability while at the same time allowing easy
prototyping in a high-level language and efficient
performance of low-level optimized code.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Multimedia Computing,
Communications, and Applications",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J961",
}
@Article{Guerrero:2014:PCM,
author = "Gin{\'e}s D. Guerrero and Richard M. Wallace and
Jos{\'e} L. V{\'a}zquez-Poletti and Jos{\'e} M. Cecilia
and Jos{\'e} M. Garc{\'\i}a and Daniel Mozos and
Horacio P{\'e}rez-S{\'a}nchez",
title = "A performance\slash cost model for a {CUDA} drug
discovery application on physical and public cloud
infrastructures",
journal = j-CCPE,
volume = "26",
number = "10",
pages = "1787--1798",
month = jul,
year = "2014",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3117",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Sep 9 16:46:30 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "14 Aug 2013",
}
@Article{Hall:2014:MMC,
author = "Clifford Hall and Weixiao Ji and Estela
Blaisten-Barojas",
title = "The {Metropolis Monte Carlo} method with {CUDA}
enabled {Graphic Processing Units}",
journal = j-J-COMPUT-PHYS,
volume = "258",
number = "??",
pages = "871--879",
day = "1",
month = feb,
year = "2014",
CODEN = "JCTPAH",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Mon Dec 23 10:39:12 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999113007626",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991/",
}
@Book{Hanson:2014:NCM,
author = "Richard J. Hanson and Tim Hopkins",
title = "Numerical computing with modern {Fortran}",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "xv + 244",
year = "2014",
ISBN = "1-61197-311-2 (paperback), 1-61197-312-0 (e-book)",
ISBN-13 = "978-1-61197-311-2 (paperback), 978-1-61197-312-9
(e-book)",
LCCN = "QA76.73.F25 H367 2013",
bibdate = "Wed Mar 12 11:09:16 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/numana2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = "Applied mathematics",
abstract = "The Fortran language standard has undergone
significant upgrades in recent years (1990, 1995, 2003,
and 2008). \booktitle{Numerical Computing with Modern
Fortran} illustrates many of these improvements through
practical solutions to a number of scientific and
engineering problems. Readers will discover: techniques
for modernizing algorithms written in Fortran; examples
of Fortran interoperating with C or C++ programs, plus
using the IEEE floating-point standard for efficiency;
illustrations of parallel Fortran programming using
coarrays, MPI, and OpenMP; and a supplementary website
with downloadable source codes discussed in the book.",
acknowledgement = ack-nhfb,
subject = "FORTRAN (Computer program language); Numerical
analysis; Computer programs; Science; Mathematics",
tableofcontents = "Introduction \\
The modern Fortran source \\
Modules for subprogram libraries \\
Generic subprograms \\
Sparse matrices, defined operations, overloaded
assignment \\
Object-oriented programming for numerical applications
\\
Recursion in Fortran \\
Case study: toward a modern QUADPACK routine \\
Case study: quadrature routine qag2003 \\
IEEE arithmetic features and exception handling \\
Interoperability with C \\
Defined operations for sparse matrix solutions \\
Case study: two sparse least-squares system examples
\\
Message passing with MPI in standard Fortran \\
Coarrays in standard Fortran \\
OpenMP in Fortran \\
Modifying source to remove obsolescent or deleted
features \\
Software testing \\
Compilers \\
Software tools \\
Fortran book code on SIAM web site \\
Bibliography \\
Index",
}
@InProceedings{Haynes:2014:MOA,
author = "Ronald D. Haynes and Benjamin W. Ong",
title = "{MPI--OpenMP} Algorithms for the Parallel Space-Time
Solution of Time Dependent {PDEs}",
crossref = "Erhel:2014:DDM",
volume = "98",
pages = "179--187",
year = "2014",
DOI = "https://doi.org/10.1007/978-3-319-05789-7_14",
bibdate = "Sat Dec 12 10:22:13 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-319-05789-7_14/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-319-05789-7",
book-URL = "http://www.springerlink.com/content/978-3-319-05789-7",
}
@Article{Holmen:2014:ASI,
author = "John K. Holmen and David L. Foster",
title = "Accelerating Single Iteration Performance of
{CUDA}--Based {$3$D} Reaction--Diffusion Simulations",
journal = j-INT-J-PARALLEL-PROG,
volume = "42",
number = "2",
pages = "343--363",
month = apr,
year = "2014",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-013-0251-z",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Thu Mar 13 19:25:13 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=2;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See erratum \cite{Holmen:2014:EAS}.",
URL = "http://link.springer.com/article/10.1007/s10766-013-0251-z",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Holmen:2014:EAS,
author = "John K. Holmen and David L. Foster",
title = "Erratum to: Accelerating Single Iteration Performance
of {CUDA}--Based {$3$D} Reaction--Diffusion
Simulations",
journal = j-INT-J-PARALLEL-PROG,
volume = "42",
number = "2",
pages = "364--364",
month = apr,
year = "2014",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-014-0305-x",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Thu Mar 13 19:25:13 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=2;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See \cite{Holmen:2014:ASI}.",
URL = "http://link.springer.com/content/pdf/10.1007/s10766-014-0305-x.pdf",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Jenkins:2014:PMD,
author = "John Jenkins and James Dinan and Pavan Balaji and Tom
Peterka and Nagiza F. Samatova and Rajeev Thakur",
title = "Processing {MPI} Derived Datatypes on Noncontiguous
{GPU}-Resident Data",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "25",
number = "10",
pages = "2627--2637",
month = oct,
year = "2014",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 12 13:58:32 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.computer.org/csdl/trans/td/2014/10/06600679-abs.html",
abstract-URL = "http://www.computer.org/csdl/trans/td/2014/10/06600679-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Jie:2014:ASP,
author = "Liang Jie and KenLi Li and Lin Shi and RangSu Liu and
Jing Mei",
title = "Accelerating solidification process simulation for
large-sized system of liquid metal atoms using {GPU}
with {CUDA}",
journal = j-J-COMPUT-PHYS,
volume = "257",
number = "??",
pages = "521--535",
day = "15",
month = jan,
year = "2014",
CODEN = "JCTPAH",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Sat Nov 30 14:26:13 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999113006803",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Joldes:2014:SSH,
author = "Mioara Joldes and Valentina Popescu and Warwick
Tucker",
title = "Searching for Sinks for the {H{\'e}non} Map using a
Multiple-precision {GPU} Arithmetic Library",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "63--68",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693726",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Today, GPUs represent an important hardware
development platform for many problems in dynamical
systems, where massive parallel computations are
needed. Beside that, many numerical studies of chaotic
dynamical systems require a computing precision higher
than common floating point (FP) formats. One such
application is locating invariant sets for chaotic
dynamical systems. In particular, we focus on
rigorously proving the existence of stable periodic
orbits for the H{\'e}non map for parameter values close
to the classical ones. For that, we present a
multiple-precision floating-point arithmetic library in
CUDA programming language for the NVIDIA GPU platform.
Our library extends the precision using so-called FP
expansions, where a number is represented as the
unevaluated sum of standard machine precision FP
numbers. This format offers the advantage of using
directly available and highly optimized hardware FP
operations. We generalize algorithms used by
multiple-precisions libraries such as Bailey's QD, or
the analogue GPU version, GQD.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "HEART '14 conference proceedings.",
}
@Article{Jung:2014:MCM,
author = "Jaewoon Jung and Takaharu Mori and Yuji Sugita",
title = "Midpoint cell method for hybrid {(MPI + OpenMP)}
parallelization of molecular dynamics simulations",
journal = j-J-COMPUT-CHEM,
volume = "35",
number = "14",
pages = "1064--1072",
day = "30",
month = may,
year = "2014",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.23591",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Wed Aug 27 06:34:07 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651",
onlinedate = "23 Mar 2014",
}
@Article{Kamal:2014:IFG,
author = "Humaira Kamal and Alan Wagner",
title = "An integrated fine-grain runtime system for {MPI}",
journal = j-COMPUTING,
volume = "96",
number = "4",
pages = "293--309",
month = apr,
year = "2014",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-013-0329-x",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Fri Jun 6 10:07:21 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=96&issue=4;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s00607-013-0329-x",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Kim:2014:VVF,
author = "Young-Joo Kim and Sejun Song and Yong-Kee Jun",
title = "{VORD}: A Versatile On-the-fly Race Detection Tool in
{OpenMP} Programs",
journal = j-INT-J-PARALLEL-PROG,
volume = "42",
number = "6",
pages = "900--930",
month = dec,
year = "2014",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-013-0257-6",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Wed Sep 10 07:13:09 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=6;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-013-0257-6",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Komura:2014:CPG,
author = "Yukihiro Komura and Yutaka Okabe",
title = "{CUDA} programs for the {GPU} computing of the
{Swendsen--Wang} multi-cluster spin flip algorithm:
{$2$D} and {$3$D} {Ising}, {Potts}, and {$ X Y $}
models",
journal = j-COMP-PHYS-COMM,
volume = "185",
number = "3",
pages = "1038--1043",
month = mar,
year = "2014",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Feb 4 19:25:59 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465513003743",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Kumar:2014:OMC,
author = "Sameer Kumar and Amith Mamidala and Philip
Heidelberger and Dong Chen and Daniel Faraj",
title = "Optimization of {MPI} collective operations on the
{IBM Blue Gene/Q} supercomputer",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "450--464",
month = nov,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014552086",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/450",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Langr:2014:APP,
author = "Daniel Langr and Pavel Tvrd{\'\i}k and Tom{\'a}s
Dytrych and Jerry P. Draayer",
title = "{Algorithm 947}: {Paraperm} --- Parallel Generation of
Random Permutations with {MPI}",
journal = j-TOMS,
volume = "41",
number = "1",
pages = "5:1--5:26",
month = oct,
year = "2014",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2669372",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon Oct 27 16:37:25 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "An algorithm for parallel generation of a random
permutation of a large set of distinct integers is
presented. This algorithm is designed for massively
parallel systems with distributed memory architectures
and the MPI-based runtime environments. Scalability of
the algorithm is analyzed according to the memory and
communication requirements. An implementation of the
algorithm in a form of a software library based on the
C++ programming language and the MPI application
programming interface is further provided. Finally,
performed experiments are described and their results
discussed. The biggest of these experiments resulted in
a generation of a random permutation of $ 2^{41} $
integers in slightly more than four minutes using
131072 CPU cores.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{LaSalle:2014:MBD,
author = "Dominique LaSalle and George Karypis",
title = "{MPI} for Big Data: New tricks for an old dog",
journal = j-PARALLEL-COMPUTING,
volume = "40",
number = "10",
pages = "754--767",
month = dec,
year = "2014",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Nov 24 12:48:48 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819114000830",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Lee:2014:BCA,
author = "Changmin Lee and Won Woo Ro and Jean-Luc Gaudiot",
title = "Boosting {CUDA} Applications with {CPU--GPU} Hybrid
Computing",
journal = j-INT-J-PARALLEL-PROG,
volume = "42",
number = "2",
pages = "384--404",
month = apr,
year = "2014",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-013-0252-y",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Thu Mar 13 19:25:13 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0885-7458&volume=42&issue=2;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-013-0252-y",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Losada:2014:EAL,
author = "N. Losada and M. J. Mart{\'\i}n and G. Rodr{\'\i}guez
and P. Gonz{\'a}lez",
title = "Extending an Application-Level Checkpointing Tool to
Provide Fault Tolerance Support to {OpenMP}
Applications",
journal = j-J-UCS,
volume = "20",
number = "9",
pages = "1351--??",
month = "????",
year = "2014",
CODEN = "????",
ISSN = "0948-695X (print), 0948-6968 (electronic)",
ISSN-L = "0948-6968",
bibdate = "Fri Feb 13 11:25:50 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jucs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.jucs.org/jucs_20_9/extending_an_application_level",
acknowledgement = ack-nhfb,
fjournal = "J.UCS: Journal of Universal Computer Science",
journal-URL = "http://www.jucs.org/jucs",
}
@Article{Luo:2014:ISM,
author = "Miao Luo and Xiaoyi Lu and Khaled Hamidouche and
Krishna Kandalla and Dhabaleswar K. Panda",
title = "Initial study of multi-endpoint runtime for {MPI +
OpenMP} hybrid programming model on multi-core
systems",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "395--396",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555287",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "State-of-the-art MPI libraries rely on locks to
guarantee thread-safety. This discourages application
developers from using multiple threads to perform MPI
operations. In this paper, we propose a high
performance, lock-free multi-endpoint MPI runtime,
which can achieve up to 40\% improvement for
point-to-point operation and one representative
collective operation with minimum or no modifications
to the existing applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Mitra:2014:AAP,
author = "Subrata Mitra and Ignacio Laguna and Dong H. Ahn and
Saurabh Bagchi and Martin Schulz and Todd Gamblin",
title = "Accurate application progress analysis for large-scale
parallel debugging",
journal = j-SIGPLAN,
volume = "49",
number = "6",
pages = "193--203",
month = jun,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2666356.2594336",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Sep 26 07:38:28 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging large-scale parallel applications is
challenging. In most HPC applications, parallel tasks
progress in a coordinated fashion, and thus a fault in
one task can quickly propagate to other tasks, making
it difficult to debug. Finding the least-progressed
tasks can significantly reduce the effort to identify
the task where the fault originated. However, existing
approaches for detecting them suffer low accuracy and
large overheads; either they use imprecise static
analysis or are unable to infer progress dependence
inside loops. We present a loop-aware
progress-dependence analysis tool, Prodometer, which
determines relative progress among parallel tasks via
dynamic analysis. Our fault-injection experiments
suggest that its accuracy and precision are over 90\%
for most cases and that it scales well up to 16,384 MPI
tasks. Further, our case study shows that it
significantly helped diagnosing a perplexing error in
MPI, which only manifested at large scale.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
received = "PLDI '14 conference proceedings.",
}
@Article{Molero-Armenta:2014:OOI,
author = "M. Molero-Armenta and Ursula Iturrar{\'a}n-Viveros and
S. Aparicio and M. G. Hern{\'a}ndez",
title = "Optimized {OpenCL} implementation of the
{Elastodynamic Finite Integration Technique} for
viscoelastic media",
journal = j-COMP-PHYS-COMM,
volume = "185",
number = "10",
pages = "2683--2696",
month = oct,
year = "2014",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Aug 16 08:37:41 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465514001702",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Morishima:2014:PEG,
author = "Shin Morishima and Hiroki Matsutani",
title = "Performance Evaluations of Graph Database using {CUDA}
and {OpenMP} Compatible Libraries",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "75--80",
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693728",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Dec 3 16:18:50 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Graph databases use graph structures to store data
sets as nodes, edges, and properties. They are used to
store and search the relationships between a large
number of nodes, such as social networking services and
recommendation engines that use customer social graphs.
Since computation cost for graph search queries
increases as the graph becomes large, in this paper we
accelerate the graph search functions (Dijkstra and A*
algorithms) of a graph database Neo4j using two ways:
multithreaded library and CUDA library for graphics
processing units (GPUs). We use 100,000-node graphs
generated based on a degree distribution of Facebook
social graph for evaluations. Our multi-threaded and
GPU-based implementations require an auxiliary
adjacency matrix for a target graph. The results show
that, when we do not take into account additional
overhead to generate the auxiliary adjacency matrix,
multi-threaded version improves the Dijkstra and A*
search performance by 16.2x and 13.8x compared to the
original implementation. The GPU-based implementation
improves the Dijkstra and A* search performance by
26.2x and 32.8x. When we take into account the
overhead, although the speed-ups by our implementations
are reduced, by reusing the auxiliary adjacency matrix
for multiple graph search queries we can significantly
improve the graph search performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "HEART '14 conference proceedings.",
}
@Article{Nomura:2014:PAM,
author = "Shimpei Nomura and Takuji Mitsuishi and Jun Suzuki and
Yuki Hayashi and Masaki Kan and Hideharu Amano",
title = "Performance Analysis of the Multi-{GPU} System with
{ExpEther}",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "4",
pages = "9--14",
month = sep,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2693714.2693717",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:35 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "A GPU cluster in which each node provides a few GPUs
connected with PCIe (PCI Express) is commonly used for
acceleration of a large application program requiring
the performance beyond a single GPU. However, in such a
system, programmers are required to describe two
parallel programming between nodes in MPIs or other
message passing library as well as the fine grained
parallel programming for intra-GPUs. As a cost
effective alternative of such clusters, we propose a
novel multi-GPU system with ExpEther, a virtualization
technique which extends PCIe of a host CPU to Ethernet.
All devices connected by ExpEther can be treated as if
they were directly connected to the host. Evaluation
with two application programs with and without GPU-GPU
communication revealed that the proposed system with
four GPUs achieved 3.88 and 3.29 times performance
improvement respectively compared with a single GPU
system. Compared with GPU cluster system in which each
node provides a GPU, the proposed system achieved about
7\% and 30\% performance improvement, respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "HEART '14 conference proceedings.",
}
@Article{Olukotun:2014:BPP,
author = "Kunle Olukotun",
title = "Beyond parallel programming with domain specific
languages",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "179--180",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2557966",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Today, almost all computer architectures are parallel
and heterogeneous; a combination of multiple CPUs, GPUs
and specialized processors. This creates a challenging
problem for application developers who want to develop
high performance programs without the effort required
to use low-level, architecture specific parallel
programming models (e.g., OpenMP for CMPs, CUDA for
GPUs, MPI for clusters). Domain-specific languages
(DSLs) are a promising solution to this problem because
they can provide an avenue for high-level
application-specific abstractions with implicit
parallelism to be mapped directly to low level
architecture-specific programming models; providing
both high programmer productivity and high execution
performance. In this talk I will describe an approach
to building high performance DSLs, which is based on
DSL embedding in a general purpose programming
language, metaprogramming and a DSL infrastructure
called Delite. I will describe how we transform DSL
programs into efficient first-order low-level code
using domain specific optimization, parallelism and
locality optimization with parallel patterns, and
architecture-specific code generation. All
optimizations and transformations are implemented in
Delite: an extensible DSL compiler infrastucture that
significantly reduces the effort required to develop
new DSLs. Delite DSLs for machine learning, data
querying, graph analysis, and scientific computing all
achieve performance competitive with manually
parallelized C++ code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Pal:2014:PMH,
author = "Anirban Pal and Abhishek Agarwala and Soumyendu Raha
and Baidurya Bhattacharya",
title = "Performance metrics in a hybrid {MPI--OpenMP} based
molecular dynamics simulation with short-range
interactions",
journal = j-J-PAR-DIST-COMP,
volume = "74",
number = "3",
pages = "2203--2214",
month = mar,
year = "2014",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Tue Jan 28 12:39:53 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731513002505",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315/",
}
@Article{Panda:2014:GAM,
author = "Dhabaleswar K. Panda",
title = "{GPU}-Aware {MPI} on {RDMA}-Enabled Clusters: Design,
Implementation and Evaluation",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "25",
number = "10",
pages = "2595--2605",
month = oct,
year = "2014",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 12 13:58:32 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.computer.org/csdl/trans/td/2014/10/06587715-abs.html",
abstract-URL = "http://www.computer.org/csdl/trans/td/2014/10/06587715-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Pawliczek:2014:VED,
author = "Piotr Pawliczek and Witold Dzwinel and David A. Yuen",
title = "Visual exploration of data by using multidimensional
scaling on multicore {CPU}, {GPU}, and {MPI} cluster",
journal = j-CCPE,
volume = "26",
number = "3",
pages = "662--682",
day = "10",
month = mar,
year = "2014",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3027",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Feb 27 14:51:21 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "30 Apr 2013",
}
@Article{Pena:2014:CEC,
author = "Antonio J. Pe{\~n}a and Carlos Rea{\~n}o and Federico
Silla and Rafael Mayo and Enrique S. Quintana-Ort{\'\i}
and Jos{\'e} Duato",
title = "A complete and efficient {CUDA}-sharing solution for
{HPC} clusters",
journal = j-PARALLEL-COMPUTING,
volume = "40",
number = "10",
pages = "574--588",
month = dec,
year = "2014",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Nov 24 12:48:48 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819114001227",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Peng:2014:BAH,
author = "Yuanxi Peng and Manuel Salda{\~n}a and Christopher A.
Madill and Xiaofeng Zou and Paul Chow",
title = "Benefits of Adding Hardware Support for Broadcast and
Reduce Operations in {MPSoC} Applications",
journal = j-TRETS,
volume = "7",
number = "3",
pages = "17:1--17:??",
month = aug,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2629470",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Mon Sep 1 10:42:23 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
abstract = "MPI has been used as a parallel programming model for
supercomputers and clusters and recently in
MultiProcessor Systems-on-Chip (MPSoC). One component
of MPI is collective communication and its performance
is key for certain parallel applications to achieve
good speedups. Previous work showed that, with
synthetic communication-only benchmarks, communication
improvements of up to 11.4-fold and 22-fold for
broadcast and reduce operations, respectively, can be
achieved by providing hardware support at the network
level in a Network-on-Chip (NoC). However, these
numbers do not provide a good estimation of the
advantage for actual applications, as there are other
factors that affect performance besides communications,
such as computation. To this end, we extend our
previous work by evaluating the impact of hardware
support over a set of five parallel application kernels
of varying computation-to-communication ratios. By
introducing some useful computation to the performance
evaluation, we obtain more representative results of
the benefits of adding hardware support for broadcast
and reduce operations. The experiments show that
applications with lower computation-to-communication
ratios benefit the most from hardware support as they
highly depend on efficient collective communications to
achieve better scalability. We also extend our work by
doing more analysis on clock frequency, resource usage,
power, and energy. The results show reasonable
scalability for resource utilization and power in the
network interfaces as the number of channels increases
and that, even though more power is dissipated in the
network interfaces due to the added hardware, the total
energy used can still be less if the actual speedup is
sufficient. The application kernels are executed in a
24-embedded-processor system distributed across four
FPGAs.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Article{Peng:2014:IDI,
author = "Yi Peng and Li Chen and Jun-Hai Yong",
title = "Importance-Driven Isosurface Decimation for
Visualization of Large Simulation Data Based on
{OpenCL}",
journal = j-COMPUT-SCI-ENG,
volume = "16",
number = "1",
pages = "24--32",
month = jan # "\slash " # feb,
year = "2014",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2013.45",
ISSN = "1521-9615",
ISSN-L = "1521-9615",
bibdate = "Sat Apr 19 10:17:39 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Priimak:2014:FDN,
author = "Dmitri Priimak",
title = "Finite difference numerical method for the
superlattice {Boltzmann} transport equation and case
comparison of {CPU(C)} and {GPU(CUDA)}
implementations",
journal = j-J-COMPUT-PHYS,
volume = "278",
number = "??",
pages = "182--192",
day = "1",
month = dec,
year = "2014",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1016/j.jcp.2014.08.028",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Tue Sep 23 17:27:17 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999114005828",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991/",
}
@Article{Rodrigues:2014:TPS,
author = "Christopher Rodrigues and Thomas Jablin and Abdul
Dakkak and Wen-Mei Hwu",
title = "{Triolet}: a programming system that unifies
algorithmic skeleton interfaces for high-performance
cluster computing",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "247--258",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555268",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Functional algorithmic skeletons promise a high-level
programming interface for distributed-memory clusters
that free developers from concerns of task
decomposition, scheduling, and communication.
Unfortunately, prior distributed functional skeleton
frameworks do not deliver performance comparable to
that achievable in a low-level distributed programming
model such as C with MPI and OpenMP, even when used in
concert with high-performance array libraries. There
are several causes: they do not take advantage of
shared memory on each cluster node; they impose a fixed
partitioning strategy on input data; and they have
limited ability to fuse loops involving skeletons that
produce a variable number of outputs per input. We
address these shortcomings in the Triolet programming
language through a modular library design that
separates concerns of parallelism, loop nesting, and
data partitioning. We show how Triolet substantially
improves the parallel performance of algorithms
involving array traversals and nested, variable-size
loops over what is achievable in Eden, a distributed
variant of Haskell. We further demonstrate how Triolet
can substantially simplify parallel programming
relative to C with MPI and OpenMP while achieving
23--100\% of its performance on a 128-core cluster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Saillard:2014:PCS,
author = "Emmanuelle Saillard and Patrick Carribault and Denis
Barthou",
title = "{PARCOACH}: Combining static and dynamic validation of
{MPI} collective communications",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "425--434",
month = nov,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014552204",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/425",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "September 26, 2014",
}
@Article{Samadi:2014:LGU,
author = "Mehrzad Samadi and Amir Hormati and Janghaeng Lee and
Scott Mahlke",
title = "Leveraging {GPUs} using cooperative loop speculation",
journal = j-TACO,
volume = "11",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2579617",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Mar 14 17:30:52 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Graphics processing units, or GPUs, provide TFLOPs of
additional performance potential in commodity computer
systems that frequently go unused by most applications.
Even with the emergence of languages such as CUDA and
OpenCL, programming GPUs remains a difficult challenge
for a variety of reasons, including the inherent
algorithmic characteristics and data structure choices
used by applications as well as the tedious performance
optimization cycle that is necessary to achieve high
performance. The goal of this work is to increase the
applicability of GPUs beyond CUDA/OpenCL to implicitly
data-parallel applications written in C/C++ using
speculative parallelization. To achieve this goal, we
propose Paragon: a static/dynamic compiler platform to
speculatively run possibly data-parallel portions of
sequential applications on the GPU while cooperating
with the system CPU. For such loops, Paragon utilizes
the GPU in an opportunistic way while orchestrating a
cooperative relation between the CPU and GPU to reduce
the overhead of miss-speculations. Paragon monitors the
dependencies for the loops running speculatively on the
GPU and nonspeculatively on the CPU using a lightweight
distributed conflict detection designed specifically
for GPUs, and transfers the execution to the CPU in
case a conflict is detected. Paragon resumes the
execution on the GPU after the CPU resolves the
dependency. Our experiments show that Paragon achieves
4x on average and up to 30x speedup compared to unsafe
CPU execution with four threads and 7x on average and
up to 64x speedup versus sequential execution across a
set of sequential but implicitly data-parallel
applications.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Samadi:2014:PPB,
author = "Mehrzad Samadi and Davoud Anoushe Jamshidi and
Janghaeng Lee and Scott Mahlke",
title = "{Paraprox}: pattern-based approximation for data
parallel applications",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "35--50",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541948",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:47 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Approximate computing is an approach where reduced
accuracy of results is traded off for increased speed,
throughput, or both. Loss of accuracy is not
permissible in all computing domains, but there are a
growing number of data-intensive domains where the
output of programs need not be perfectly correct to
provide useful results or even noticeable differences
to the end user. These soft domains include multimedia
processing, machine learning, and data mining/analysis.
An important challenge with approximate computing is
transparency to insulate both software and hardware
developers from the time, cost, and difficulty of using
approximation. This paper proposes a software-only
system, Paraprox, for realizing transparent
approximation of data-parallel programs that operates
on commodity hardware systems. Paraprox starts with a
data-parallel kernel implemented using OpenCL or CUDA
and creates a parameterized approximate kernel that is
tuned at runtime to maximize performance subject to a
target output quality (TOQ) that is supplied by the
user. Approximate kernels are created by recognizing
common computation idioms found in data-parallel
programs (e.g., Map, Scatter/Gather, Reduction, Scan,
Stencil, and Partition) and substituting approximate
implementations in their place. Across a set of 13 soft
data-parallel applications with at most 10\% quality
degradation, Paraprox yields an average performance
gain of 2.7x on a NVIDIA GTX 560 GPU and 2.5x on an
Intel Core i7 quad-core processor compared to accurate
execution on each platform.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Samadi:2014:SPS,
author = "Mehrzad Samadi and Janghaeng Lee and D. Anoushe
Jamshidi and Scott Mahlke and Amir Hormati",
title = "Scaling Performance via Self-Tuning Approximation for
Graphics Engines",
journal = j-TOCS,
volume = "32",
number = "3",
pages = "7:1--7:??",
month = sep,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2631913",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:28 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Approximate computing, where computation accuracy is
traded off for better performance or higher data
throughput, is one solution that can help data
processing keep pace with the current and growing
abundance of information. For particular domains, such
as multimedia and learning algorithms, approximation is
commonly used today. We consider automation to be
essential to provide transparent approximation, and we
show that larger benefits can be achieved by
constructing the approximation techniques to fit the
underlying hardware. Our target platform is the GPU
because of its high performance capabilities and
difficult programming challenges that can be alleviated
with proper automation. Our approach --- SAGE ---
combines a static compiler that automatically generates
a set of CUDA kernels with varying levels of
approximation with a runtime system that iteratively
selects among the available kernels to achieve speedup
while adhering to a target output quality set by the
user. The SAGE compiler employs three optimization
techniques to generate approximate kernels that exploit
the GPU microarchitecture: selective discarding of
atomic operations, data packing, and thread fusion.
Across a set of machine learning and image processing
kernels, SAGE's approximation yields an average of 2.5$
\times $ speedup with less than 10\% quality loss
compared to the accurate execution on a NVIDIA GTX 560
GPU.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Sani:2014:PDF,
author = "Ardalan Amiri Sani and Kevin Boos and Shaopu Qin and
Lin Zhong",
title = "{I/O} paravirtualization at the device file boundary",
journal = j-COMP-ARCH-NEWS,
volume = "42",
number = "1",
pages = "319--332",
month = mar,
year = "2014",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2654822.2541943",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Mon Aug 18 17:12:47 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Paravirtualization is an important I/O virtualization
technology since it uniquely provides all of the
following benefits: the ability to share the device
between multiple VMs, support for legacy devices
without virtualization hardware, and high performance.
However, existing paravirtualization solutions have one
main limitation: they only support one I/O device
class, and would require significant engineering effort
to support new device classes and features. In this
paper, we present Paradice, a solution that vastly
simplifies I/O paravirtualization by using a common
paravirtualization boundary for various I/O device
classes: Unix device files. Using this boundary, the
paravirtual drivers simply act as a class-agnostic
indirection layer between the application and the
actual device driver. We address two fundamental
challenges: supporting cross-VM driver memory
operations without changes to applications or device
drivers and providing fault and device data isolation
between guest VMs despite device driver bugs. We
implement Paradice for x86, the Xen hypervisor, and the
Linux and FreeBSD OSes. Our implementation
paravirtualizes various GPUs, input devices, cameras,
an audio device, and an Ethernet card for the netmap
framework with $ \approx 7700 $ LoC, of which only $
\approx 900 $ are device class-specific. Our
measurements show that Paradice achieves performance
close to native for different devices and applications
including netmap, 3D HD games, and OpenCL
applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ASPLOS '14 conference proceedings.",
}
@Article{Song:2014:DAT,
author = "Sukhyun Song and Jeffrey K. Hollingsworth",
title = "Designing and auto-tuning parallel {$3$-D FFT} for
computation-communication overlap",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "181--192",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555249",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper presents a method to design and auto-tune a
new parallel 3-D FFT code using the non-blocking MPI
all-to-all operation. We achieve high performance by
optimizing computation-communication overlap. Our code
performs fully asynchronous communication without any
support from special hardware. We also improve cache
performance through loop tiling. To cope with the
complex trade-off regarding our optimization
techniques, we parameterize our code and auto-tune the
parameters efficiently in a large parameter space.
Experimental results from two systems confirm that our
code achieves a speedup of up to 1.76x over the FFTW
library.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Steinberger:2014:WTB,
author = "Markus Steinberger and Michael Kenzel and Pedro
Boechat and Bernhard Kerbl and Mark Dokter and Dieter
Schmalstieg",
title = "{Whippletree}: task-based scheduling of dynamic
workloads on the {GPU}",
journal = j-TOG,
volume = "33",
number = "6",
pages = "228:1--228:??",
month = nov,
year = "2014",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/2661229.2661250",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Fri Nov 14 19:16:26 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tog/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
abstract = "In this paper, we present Whippletree, a novel
approach to scheduling dynamic, irregular workloads on
the GPU. We introduce a new programming model which
offers the simplicity and expressiveness of task-based
parallelism while retaining all aspects of the
multi-level execution hierarchy essential to unlocking
the full potential of a modern GPU. At the same time,
our programming model lends itself to efficient
implementation on the SIMD-based architecture typical
of a current GPU. We demonstrate the practical utility
of our model by providing a reference implementation on
top of current CUDA hardware. Furthermore, we show that
our model compares favorably to traditional approaches
in terms of both performance as well as the range of
applications that can be covered. We demonstrate the
benefits of our model for recursive Reyes rendering,
procedural geometry generation and volume rendering
with concurrent irradiance caching.",
acknowledgement = ack-nhfb,
articleno = "228",
fjournal = "ACM Transactions on Graphics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J778",
}
@Article{Steuwer:2014:SHL,
author = "Michel Steuwer and Sergei Gorlatch",
title = "{SkelCL}: a high-level extension of {OpenCL} for
{multi-GPU} systems",
journal = j-J-SUPERCOMPUTING,
volume = "69",
number = "1",
pages = "25--33",
month = jul,
year = "2014",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1213-y",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Sep 10 06:45:05 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=69&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1213-y",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Symeonidou:2014:DRB,
author = "Christi Symeonidou and Polyvios Pratikakis and
Dimitrios S. Nikolopoulos and Angelos Bilas",
title = "Distributed region-based memory allocation and
synchronization",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "406--414",
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014552863",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/406",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = nov,
}
@Article{Teixido:2014:MBI,
author = "Ivan Teixid{\'o} and Francesc Seb{\'e} and Josep Conde
and Francesc Solsona",
title = "{MPI}-based implementation of an enhanced algorithm to
solve the {LPN} problem in a memory-constrained
environment",
journal = j-PARALLEL-COMPUTING,
volume = "40",
number = "5--6",
pages = "100--112",
month = may,
year = "2014",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri May 30 18:33:51 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819114000453",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Thompson:2014:CIC,
author = "Elizabeth A. Thompson and Timothy R. Anderson",
title = "A {CUDA} implementation of the {Continuous Space
Language Model}",
journal = j-J-SUPERCOMPUTING,
volume = "68",
number = "1",
pages = "65--86",
month = apr,
year = "2014",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-013-1023-7",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Sep 10 06:44:53 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=68&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-013-1023-7",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Tien:2014:EOS,
author = "Tsan-Rong Tien and Yi-Ping You",
title = "Enabling {OpenCL} support for {GPGPU} in Kernel-based
Virtual Machine",
journal = j-SPE,
volume = "44",
number = "5",
pages = "483--510",
month = may,
year = "2014",
CODEN = "SPEXBL",
DOI = "https://doi.org/10.1002/spe.2166",
ISSN = "0038-0644 (print), 1097-024X (electronic)",
ISSN-L = "0038-0644",
bibdate = "Wed Sep 10 05:57:32 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/spe.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "Software --- Practice and Experience",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X",
onlinedate = "22 Nov 2012",
}
@Article{Traff:2014:SPE,
author = "Jesper Larsson Tr{\"a}ff and Siegfried Benkner",
title = "Selected Papers from {EuroMPI 2012}",
journal = j-COMPUTING,
volume = "96",
number = "4",
pages = "259--261",
month = apr,
year = "2014",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-013-0335-z",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Fri Jun 6 10:07:21 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0010-485X&volume=96&issue=4;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s00607-013-0335-z",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Vikas:2014:MGA,
author = "Vikas and Nasser Giacaman and Oliver Sinnen",
title = "Multiprocessing with {GUI}-awareness using
{OpenMP}-like directives in {Java}",
journal = j-PARALLEL-COMPUTING,
volume = "40",
number = "2",
pages = "69--89",
month = feb,
year = "2014",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 28 06:47:16 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819113001439",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Wang:2014:IPD,
author = "Zheng Wang and Georgios Tournavitis and Bj{\"o}rn
Franke and Michael F. P. O'boyle",
title = "Integrating profile-driven parallelism detection and
machine-learning-based mapping",
journal = j-TACO,
volume = "11",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2579561",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Mar 14 17:30:52 MDT 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Compiler-based auto-parallelization is a much-studied
area but has yet to find widespread application. This
is largely due to the poor identification and
exploitation of application parallelism, resulting in
disappointing performance far below that which a
skilled expert programmer could achieve. We have
identified two weaknesses in traditional parallelizing
compilers and propose a novel, integrated approach
resulting in significant performance improvements of
the generated parallel code. Using profile-driven
parallelism detection, we overcome the limitations of
static analysis, enabling the identification of more
application parallelism, and only rely on the user for
final approval. We then replace the traditional
target-specific and inflexible mapping heuristics with
a machine-learning-based prediction mechanism,
resulting in better mapping decisions while automating
adaptation to different target architectures. We have
evaluated our parallelization strategy on the NAS and
SPEC CPU2000 benchmarks and two different multicore
platforms (dual quad-core Intel Xeon SMP and
dual-socket QS20 Cell blade). We demonstrate that our
approach not only yields significant improvements when
compared with state-of-the-art parallelizing compilers
but also comes close to and sometimes exceeds the
performance of manually parallelized codes. On average,
our methodology achieves 96\% of the performance of the
hand-tuned OpenMP NAS and SPEC parallel benchmarks on
the Intel Xeon platform and gains a significant speedup
for the IBM Cell platform, demonstrating the potential
of profile-guided and machine-learning- based
parallelization for complex multicore platforms.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Wu:2014:MAG,
author = "Xing Wu and Frank Mueller and Scott Pakin",
title = "A methodology for automatic generation of executable
communication specifications from parallel {MPI}
applications",
journal = j-TOPC,
volume = "1",
number = "1",
pages = "6:1--6:??",
month = sep,
year = "2014",
CODEN = "????",
DOI = "https://doi.org/10.1145/2660249",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Fri Oct 17 12:28:03 MDT 2014",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "Portable parallel benchmarks are widely used for
performance evaluation of HPC systems. However, because
these are manually produced, they generally represent a
greatly simplified view of application behavior,
missing the subtle but important-to-performance nuances
that may exist in a complete application. This work
contributes novel methods to automatically generate
highly portable and customizable communication
benchmarks from HPC applications. We utilize
ScalaTrace, a lossless yet scalable
parallel-application tracing framework to collect
selected aspects of the run-time behavior of HPC
applications, including communication operations and
computation time, while abstracting away the details of
the computation proper. We subsequently generate
benchmarks with nearly identical run-time behavior to
the original applications. Results demonstrate that the
generated benchmarks are in fact able to preserve the
run-time behavior (including both the communication
pattern and the execution time) of the original
applications. Such automated benchmark generation is
without precedent and particularly valuable for
proprietary, export-controlled, or classified
application codes.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Wu:2014:OFB,
author = "Jing Wu and Joseph JaJa and Elias Balaras",
title = "An Optimized {FFT}-Based Direct {Poisson} Solver on
{CUDA GPUs}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "25",
number = "3",
pages = "550--559",
month = mar,
year = "2014",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2013.53",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Mon Aug 25 07:12:16 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Yan:2014:OMB,
author = "Xin Yan and Xiaohua Shi and Lina Wang and Haiyan
Yang",
title = "An {OpenCL} micro-benchmark suite for {GPUs} and
{CPUs}",
journal = j-J-SUPERCOMPUTING,
volume = "69",
number = "2",
pages = "693--713",
month = aug,
year = "2014",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1112-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Sep 10 06:45:09 MDT 2014",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=69&issue=2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1112-2",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Yang:2014:CNR,
author = "Yi Yang and Huiyang Zhou",
title = "{CUDA-NP}: realizing nested thread-level parallelism
in {GPGPU} applications",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "93--106",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555254",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Parallel programs consist of series of code sections
with different thread-level parallelism (TLP). As a
result, it is rather common that a thread in a parallel
program, such as a GPU kernel in CUDA programs, still
contains both sequential code and parallel loops. In
order to leverage such parallel loops, the latest
Nvidia Kepler architecture introduces dynamic
parallelism, which allows a GPU thread to start another
GPU kernel, thereby reducing the overhead of launching
kernels from a CPU. However, with dynamic parallelism,
a parent thread can only communicate with its child
threads through global memory and the overhead of
launching GPU kernels is non-trivial even within GPUs.
In this paper, we first study a set of GPGPU benchmarks
that contain parallel loops, and highlight that these
bench-marks do not have a very high loop count or high
degrees of TLP. Consequently, the benefits of
leveraging such parallel loops using dynamic
parallelism are too limited to offset its overhead. We
then present our proposed solution to exploit nested
parallelism in CUDA, referred to as CUDA-NP. With
CUDA-NP, we initially enable a high number of threads
when a GPU program starts, and use control flow to
activate different numbers of threads for different
code sections. We implemented our proposed CUDA-NP
framework using a directive-based compiler approach.
For a GPU kernel, an application developer only needs
to add OpenMP-like pragmas for parallelizable code
sections. Then, our CUDA-NP compiler automatically
generates the optimized GPU kernels. It supports both
the reduction and the scan primitives, explores
different ways to distribute parallel loop iterations
into threads, and efficiently manages on-chip resource.
Our experiments show that for a set of GPGPU
benchmarks, which have already been optimized and
contain nested parallelism, our pro-posed CUDA-NP
framework further improves the performance by up to
6.69 times and 2.18 times on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Yang:2014:HPD,
author = "Luobin Yang and Steve C. Chiu and Wei-Keng Liao",
title = "High performance data clustering: a comparative
analysis of performance for {GPU}, {RASC}, {MPI}, and
{OpenMP} implementations",
journal = j-J-SUPERCOMPUTING,
volume = "70",
number = "1",
pages = "284--300",
month = oct,
year = "2014",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-013-0906-y",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri Feb 13 12:32:14 MST 2015",
bibsource = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0920-8542&volume=70&issue=1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-013-0906-y",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Yang:2014:IMP,
author = "Xu Yang and Deyuan Guo and Hu He and Haijing Tang and
Yanjun Zhang",
title = "An Implementation of {Message-Passing Interface} over
{VxWorks} for Real-Time Embedded Multi-Core Systems",
journal = j-COMP-J,
volume = "57",
number = "11",
pages = "1756--1764",
month = nov,
year = "2014",
CODEN = "CMPJA6",
DOI = "https://doi.org/10.1093/comjnl/bxt152",
ISSN = "0010-4620 (print), 1460-2067 (electronic)",
ISSN-L = "0010-4620",
bibdate = "Mon Oct 27 08:54:43 MDT 2014",
bibsource = "http://comjnl.oxfordjournals.org/content/57/11.toc;
http://www.math.utah.edu/pub/tex/bib/compj2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://comjnl.oxfordjournals.org/content/57/11/1756",
acknowledgement = ack-nhfb,
fjournal = "Computer Journal",
journal-URL = "http://comjnl.oxfordjournals.org/",
onlinedate = "January 3, 2014",
}
@Article{Yang:2014:PMI,
author = "Chaoran Yang and Wesley Bland and John Mellor-Crummey
and Pavan Balaji",
title = "Portable, {MPI}-interoperable {Coarray Fortran}",
journal = j-SIGPLAN,
volume = "49",
number = "8",
pages = "81--92",
month = aug,
year = "2014",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2692916.2555270",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Nov 26 16:26:30 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The past decade has seen the advent of a number of
parallel programming models such as Coarray Fortran
(CAF), Unified Parallel C, X10, and Chapel. Despite the
productivity gains promised by these models, most
parallel scientific applications still rely on MPI as
their data movement model. One reason for this trend is
that it is hard for users to incrementally adopt these
new programming models in existing MPI applications.
Because each model use its own runtime system, they
duplicate resources and are potentially error-prone.
Such independent runtime systems were deemed necessary
because MPI was considered insufficient in the past to
play this role for these languages. The recently
released MPI-3, however, adds several new capabilities
that now provide all of the functionality needed to act
as a runtime, including a much more comprehensive
one-sided communication framework. In this paper, we
investigate how MPI-3 can form a runtime system for one
example programming model, CAF, with a broader goal of
enabling a single application to use both MPI and CAF
with the highest level of interoperability.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '14 conference proceedings.",
}
@Article{Zheng:2014:IMS,
author = "Liang Zheng and Huai Zhang and Taras Gerya and Matthew
Knepley and David A. Yuen and Yaolin Shi",
title = "Implementation of a multigrid solver on a {GPU} for
{Stokes} equations with strongly variable viscosity
based on {Matlab} and {CUDA}",
journal = j-IJHPCA,
volume = "28",
number = "1",
pages = "50--60",
month = feb,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342013478640",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Mar 14 15:39:59 MDT 2014",
bibsource = "http://hpc.sagepub.com/content/28/1.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/1/50.full.pdf+html",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "March 5, 2013",
}
@Article{Zounmevo:2014:ESC,
author = "Judicael A. Zounmevo and Dries Kimpe and Robert Ross
and Ahmad Afsahi",
title = "Extreme-scale computing services over {MPI}:
Experiences, observations and features proposal for
next-generation message passing interface",
journal = j-IJHPCA,
volume = "28",
number = "4",
pages = "435--449",
month = nov,
year = "2014",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342014548864",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Fri Feb 13 09:17:23 MST 2015",
bibsource = "http://hpc.sagepub.com/content/28/4.toc;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://hpc.sagepub.com/content/28/4/435",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
onlinedate = "September 10, 2014",
}
@Article{Zounmevo:2014:FRC,
author = "Judicael A. Zounmevo and Ahmad Afsahi",
title = "A fast and resource-conscious {MPI} message queue
mechanism for large-scale jobs",
journal = j-FUT-GEN-COMP-SYS,
volume = "30",
number = "??",
pages = "265--290",
month = jan,
year = "2014",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Dec 2 16:57:46 MST 2013",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.sciencedirect.com/science/journal/0167739X",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X13001489",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Agosta:2015:OPP,
author = "Giovanni Agosta and Alessandro Barenghi and Alessandro
{Di Federico} and Gerardo Pelosi",
title = "{OpenCL} performance portability for general-purpose
computation on graphics processor units: an exploration
on cryptographic primitives",
journal = j-CCPE,
volume = "27",
number = "14",
pages = "3633--3660",
day = "25",
month = sep,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3358",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Sep 28 09:32:54 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "29 Aug 2014",
}
@Article{Al-Mouhamed:2015:EAO,
author = "Mayez Al-Mouhamed and Ayaz ul Hassan Khan",
title = "Exploration of automatic optimisation for {CUDA}
programming",
journal = j-INT-J-PAR-EMER-DIST-SYS,
volume = "30",
number = "4",
pages = "309--324",
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1080/17445760.2014.953158",
ISSN = "1744-5760 (print), 1744-5779 (electronic)",
ISSN-L = "1744-5760",
bibdate = "Tue Sep 15 07:34:54 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.tandfonline.com/toc/gpaa20/30/4",
URL = "http://www.tandfonline.com/doi/abs/10.1080/17445760.2014.953158",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel, Emergent and
Distributed Systems: IJPEDS",
journal-URL = "http://www.tandfonline.com/loi/gpaa20",
}
@Article{Amer:2015:MRC,
author = "Abdelhalim Amer and Huiwei Lu and Yanjie Wei and Pavan
Balaji and Satoshi Matsuoka",
title = "{MPI+Threads}: runtime contention and remedies",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "239--248",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688522",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hybrid MPI+Threads programming has emerged as an
alternative model to the ``MPI everywhere'' model to
better handle the increasing core density in cluster
nodes. While the MPI standard allows multithreaded
concurrent communication, such flexibility comes with
the cost of maintaining thread safety within the MPI
implementation, typically implemented using critical
sections. In contrast to previous works that studied
the importance of critical-section granularity in MPI
implementations, in this paper we investigate the
implication of critical-section arbitration on
communication performance. We first analyze the MPI
runtime when multithreaded concurrent communication
takes place on hierarchical memory systems. Our results
indicate that the mutex-based approach that most MPI
implementations use today can incur performance
penalties due to unfair arbitration. We then present
methods to mitigate these penalties with a first-come,
first-served arbitration and a priority locking scheme
that favors threads doing useful work. Through
evaluations using several benchmarks and applications,
we demonstrate up to 5-fold improvement in
performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Balasubramanian:2015:EGL,
author = "Raghuraman Balasubramanian and Vinay Gangadhar and
Ziliang Guo and Chen-Han Ho and Cherin Joseph and
Jaikrishnan Menon and Mario Paulo Drumond and Robin
Paul and Sharath Prasad and Pradip Valathol and
Karthikeyan Sankaralingam",
title = "Enabling {GPGPU} Low-Level Hardware Explorations with
{MIAOW}: an Open-Source {RTL} Implementation of a
{GPGPU}",
journal = j-TACO,
volume = "12",
number = "2",
pages = "21:1--21:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2764908",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Aug 7 09:46:00 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Graphic processing unit (GPU)-based general-purpose
computing is developing as a viable alternative to
CPU-based computing in many domains. Today's tools for
GPU analysis include simulators like GPGPU-Sim,
Multi2Sim, and Barra. While useful for modeling
first-order effects, these tools do not provide a
detailed view of GPU microarchitecture and physical
design. Further, as GPGPU research evolves, design
ideas and modifications demand detailed estimates of
impact on overall area and power. Fueled by this need,
we introduce MIAOW (Many-core Integrated Accelerator Of
Wisconsin), an open-source RTL implementation of the
AMD Southern Islands GPGPU ISA, capable of running
unmodified OpenCL-based applications. We present our
design motivated by our goals to create a realistic,
flexible, OpenCL-compatible GPGPU, capable of emulating
a full system. We first explore if MIAOW is realistic
and then use four case studies to show that MIAOW
enables the following: physical design perspective to
``traditional'' microarchitecture, new types of
research exploration, and validation/calibration of
simulator-based characterization of hardware. The
findings and ideas are contributions in their own
right, in addition to MIAOW's utility as a tool for
others' research.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Betts:2015:DIV,
author = "Adam Betts and Nathan Chong and Alastair F. Donaldson
and Jeroen Ketema and Shaz Qadeer and Paul Thomson and
John Wickerson",
title = "The Design and Implementation of a Verification
Technique for {GPU} Kernels",
journal = j-TOPLAS,
volume = "37",
number = "3",
pages = "10:1--10:??",
month = jun,
year = "2015",
CODEN = "ATPSDT",
DOI = "https://doi.org/10.1145/2743017",
ISSN = "0164-0925 (print), 1558-4593 (electronic)",
ISSN-L = "0164-0925",
bibdate = "Fri Jun 19 05:36:55 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/toplas/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toplas.bib",
abstract = "We present a technique for the formal verification of
GPU kernels, addressing two classes of correctness
properties: data races and barrier divergence. Our
approach is founded on a novel formal operational
semantics for GPU kernels termed {\em synchronous,
delayed visibility (SDV)\/} semantics, which captures
the execution of a GPU kernel by multiple groups of
threads. The SDV semantics provides operational
definitions for barrier divergence and for both inter-
and intra-group data races. We build on the semantics
to develop a method for reducing the task of verifying
a massively parallel GPU kernel to that of verifying a
sequential program. This completely avoids the need to
reason about thread interleavings, and allows existing
techniques for sequential program verification to be
leveraged. We describe an efficient encoding of data
race detection and propose a method for automatically
inferring the loop invariants that are required for
verification. We have implemented these techniques as a
practical verification tool, GPUVerify, that can be
applied directly to OpenCL and CUDA source code. We
evaluate GPUVerify with respect to a set of 162 kernels
drawn from public and commercial sources. Our
evaluation demonstrates that GPUVerify is capable of
efficient, automatic verification of a large number of
real-world kernels.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Programming Languages and
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783",
}
@Article{Bukata:2015:SRC,
author = "Libor Bukata and Premysl Sucha and Zdenek
Hanz{\'a}lek",
title = "Solving the Resource Constrained Project Scheduling
Problem using the parallel Tabu Search designed for the
{CUDA} platform",
journal = j-J-PAR-DIST-COMP,
volume = "77",
number = "??",
pages = "58--68",
month = mar,
year = "2015",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon Mar 2 12:05:20 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731514002226",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315/",
}
@Article{Busa:2015:CCO,
author = "J{\'a}n {Busa, Jr.} and J{\'a}n Busa and Shura Hayryan
and Chin-Kun Hu and Ming-Chya Wu",
title = "{CAVE-CL}: an {OpenCL} version of the package for
detection and quantitative analysis of internal
cavities in a system of overlapping balls: Application
to proteins",
journal = j-COMP-PHYS-COMM,
volume = "190",
number = "??",
pages = "224--227",
month = may,
year = "2015",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Mar 4 08:31:43 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465514004378",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Carretero:2015:AMM,
author = "Jesus Carretero and Javier Garcia-Blas and David E.
Singh and Florin Isaila and Alexey Lastovetsky and
Thomas Fahringer and Radu Prodan and Peter Zangerl and
Christi Symeonidou and Afshin Fassihi and Horacio
P{\'e}rez-S{\'a}nchez",
title = "Acceleration of {MPI} mechanisms for sustainable {HPC}
applications",
journal = j-SUPERFRI,
volume = "2",
number = "2",
pages = "28--45",
month = "????",
year = "2015",
CODEN = "????",
ISSN = "2409-6008 (print), 2313-8734 (electronic)",
bibdate = "Sat Nov 11 07:15:27 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/superfri.bib",
URL = "http://superfri.org/superfri/article/view/35",
acknowledgement = ack-nhfb,
fjournal = "Supercomputing Frontiers and Innovations",
journal-URL = "http://superfri.org/superfri/issue/archive",
}
@Article{Casanova:2015:SMA,
author = "Henri Casanova and Fr{\'e}d{\'e}ric Desprez and George
S. Markomanolis and Fr{\'e}d{\'e}ric Suter",
title = "Simulation of {MPI} applications with time-independent
traces",
journal = j-CCPE,
volume = "27",
number = "5",
pages = "1145--1168",
day = "10",
month = apr,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3278",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Jul 25 19:54:07 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "11 Apr 2014",
}
@Article{Casanova:2015:TMS,
author = "Henri Casanova and Anshul Gupta and Fr{\'e}d{\'e}ric
Suter",
title = "Toward More Scalable Off-Line Simulations of {MPI}
Applications",
journal = j-PARALLEL-PROCESS-LETT,
volume = "25",
number = "3",
pages = "1541002",
month = sep,
year = "2015",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626415410029",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Tue May 29 09:05:25 MDT 2018",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Cercos-Pita:2015:ANF,
author = "J. L. Cercos-Pita",
title = "{AQUAgpusph}, a new free {$3$D} {SPH} solver
accelerated with {OpenCL}",
journal = j-COMP-PHYS-COMM,
volume = "192",
number = "??",
pages = "295--312",
month = jul,
year = "2015",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Apr 21 11:56:04 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515000909",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Chabbi:2015:BEP,
author = "Milind Chabbi and Wim Lavrijsen and Wibe de Jong and
Koushik Sen and John Mellor-Crummey and Costin Iancu",
title = "Barrier elision for production parallel programs",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "109--119",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688502",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Large scientific code bases are often composed of
several layers of runtime libraries, implemented in
multiple programming languages. In such situation,
programmers often choose conservative synchronization
patterns leading to suboptimal performance. In this
paper, we present context-sensitive dynamic
optimizations that elide barriers redundant during the
program execution. In our technique, we perform data
race detection alongside the program to identify
redundant barriers in their calling contexts; after an
initial learning, we start eliding all future instances
of barriers occurring in the same calling context. We
present an automatic on-the-fly optimization and a
multi-pass guided optimization. We apply our techniques
to NWChem--a 6 million line computational chemistry
code written in C/C++/Fortran that uses several runtime
libraries such as Global Arrays, ComEx, DMAPP, and MPI.
Our technique elides a surprisingly high fraction of
barriers (as many as 63\%) in production runs. This
redundancy elimination translates to application
speedups as high as 14\% on 2048 cores. Our techniques
also provided valuable insight about the application
behavior, later used by NWChem developers. Overall, we
demonstrate the value of holistic context-sensitive
analyses that consider the domain science in
conjunction with the associated runtime software
stack.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Cho:2015:OAO,
author = "Myeongjin Cho and Youngsun Han and Minseong Kim and
Seon Wook Kim",
title = "{O2WebCL}: an automatic {OpenCL-to-WebCL} translator
for high performance web computing",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "6",
pages = "2050--2065",
month = jun,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1260-4",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Aug 8 12:23:11 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/6;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1260-4",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Couder-Castaneda:2015:PCM,
author = "C. Couder-Casta{\~n}eda and H. Barrios-Pi{\~n}a and I.
Gitler and M. Arroyo",
title = "Performance of a Code Migration for the Simulation of
Supersonic Ejector Flow to {SMP}, {MIC}, and {GPU}
Using {OpenMP}, {OpenMP+LEO}, and {OpenACC}
Directives",
journal = j-SCI-PROG,
volume = "2015",
number = "??",
pages = "739107:1--739107:20",
month = "????",
year = "2015",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.1155/2015/739107",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Sep 20 07:53:44 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
URL = "https://www.hindawi.com/journals/sp/2015/739107/",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "https://www.hindawi.com/journals/sp/",
journalabr = "Sci. Prog",
}
@Article{Ebrahimirad:2015:EAS,
author = "Vahid Ebrahimirad and Maziar Goudarzi and Aboozar
Rajabi",
title = "Energy-Aware Scheduling for Precedence-Constrained
Parallel Virtual Machines in Virtualized Data Centers",
journal = j-J-GRID-COMP,
volume = "13",
number = "2",
pages = "233--253",
month = jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1007/s10723-015-9327-x",
ISSN = "1570-7873 (print), 1572-9184 (electronic)",
ISSN-L = "1570-7873",
bibdate = "Sat Aug 8 12:08:29 MDT 2015",
bibsource = "http://link.springer.com/journal/10723/13/2;
http://www.math.utah.edu/pub/tex/bib/jgridcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "http://link.springer.com/article/10.1007/s10723-015-9327-x",
acknowledgement = ack-nhfb,
fjournal = "Journal of Grid Computing",
journal-URL = "http://link.springer.com/journal/10723",
}
@Article{Emani:2015:CDM,
author = "Murali Krishna Emani and Michael O'Boyle",
title = "Celebrating diversity: a mixture of experts approach
for runtime mapping in dynamic environments",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "499--508",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737999",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Matching program parallelism to platform parallelism
using thread selection is difficult when the
environment and available resources dynamically change.
Existing compiler or runtime approaches are typically
based on a one-size fits all policy. There is little
ability to either evaluate or adapt the policy when
encountering new external workloads or hardware
resources. This paper focuses on selecting the best
number of threads for a parallel application in dynamic
environments. It develops a new scheme based on a
mixture of experts approach. It learns online which, of
a number of existing policies, or experts, is best
suited for a particular environment without having to
try out each policy. It does this by using a novel
environment predictor as a proxy for the quality of an
expert thread selection policy. Additional expert
policies can easily be added and are selected only when
appropriate. We evaluate our scheme in environments
with varying external workloads and hardware
resources.We then consider the case when workloads use
affinity scheduling or are themselves adaptive and show
that our approach, in all cases, outperforms existing
schemes and surprisingly improves workload performance.
On average, we improve 1.66x over OpenMP default, 1.34x
over an online scheme, 1.25x over an offline policy and
1.2x over a state-of-art analytic model. Determining
the right number and type of experts is an open problem
and our initial analysis shows that adding more experts
improves accuracy and performance.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '15 conference proceedings.",
}
@Article{Fabeiro:2015:AGO,
author = "Jorge F. Fabeiro and Diego Andrade and Basilio B.
Fraguela and Ram{\'o}n Doallo",
title = "Automatic Generation of Optimized {OpenCL} Codes Using
{OCLoptimizer}",
journal = j-COMP-J,
volume = "58",
number = "11",
pages = "3057--3073",
month = nov,
year = "2015",
CODEN = "CMPJA6",
DOI = "https://doi.org/10.1093/comjnl/bxv038",
ISSN = "0010-4620 (print), 1460-2067 (electronic)",
ISSN-L = "0010-4620",
bibdate = "Tue Nov 17 08:06:33 MST 2015",
bibsource = "http://comjnl.oxfordjournals.org/content/58/11.toc;
http://www.math.utah.edu/pub/tex/bib/compj2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computer Journal",
journal-URL = "http://comjnl.oxfordjournals.org/",
onlinedate = "June 2, 2015",
}
@Article{Fang:2015:EVD,
author = "Jianbin Fang and Ana Lucia Varbanescu and Xiangke Liao
and Henk Sips",
title = "Evaluating vector data type usage in {OpenCL}
kernels",
journal = j-CCPE,
volume = "27",
number = "17",
pages = "4586--4602",
day = "10",
month = dec,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3424",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:20 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "23 Oct 2014",
}
@Article{Ferretti:2015:MCH,
author = "Marco Ferretti and Mirto Musci and Luigi Santangelo",
title = "{MPI--CMS}: a hybrid parallel approach to geometrical
motif search in proteins",
journal = j-CCPE,
volume = "27",
number = "18",
pages = "5500--5516",
day = "25",
month = dec,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3588",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:20 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "12 Aug 2015",
}
@Article{Filipovic:2015:OCC,
author = "Jir{\'\i} Filipovic and Mat{\'u}s Madzin and Jan
Fousek and Ludek Matyska",
title = "Optimizing {CUDA} code by kernel fusion: application
on {BLAS}",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "10",
pages = "3934--3957",
month = oct,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-015-1483-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Sep 29 10:07:24 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/10;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-015-1483-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Galizia:2015:MCL,
author = "Antonella Galizia and Daniele D'Agostino and Andrea
Clematis",
title = "An {MPI--CUDA} library for image processing on {HPC}
architectures",
journal = j-J-COMPUT-APPL-MATH,
volume = "273",
number = "??",
pages = "414--427",
day = "1",
month = jan,
year = "2015",
CODEN = "JCAMDI",
ISSN = "0377-0427 (print), 1879-1778 (electronic)",
ISSN-L = "0377-0427",
bibdate = "Sat Feb 25 13:34:46 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputapplmath2015.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0377042714002374",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational and Applied Mathematics",
journal-URL = "http://www.sciencedirect.com/science/journal/03770427",
}
@Article{Garain:2015:CCF,
author = "Sudip Garain and Dinshaw S. Balsara and John Reid",
title = "Comparing {Coarray Fortran (CAF)} with {MPI} for
several structured mesh {PDE} applications",
journal = j-J-COMPUT-PHYS,
volume = "297",
number = "??",
pages = "237--253",
day = "15",
month = sep,
year = "2015",
CODEN = "JCTPAH",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Sat Jul 25 09:25:55 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/jcomputphys2015.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S002199911500354X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991/",
}
@Article{Gidra:2015:NGC,
author = "Lokesh Gidra and Ga{\"e}l Thomas and Julien Sopena and
Marc Shapiro and Nhan Nguyen",
title = "{NumaGiC}: a Garbage Collector for Big Data on Big
{NUMA} Machines",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "661--673",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694361",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "On contemporary cache-coherent Non-Uniform Memory
Access (ccNUMA) architectures, applications with a
large memory footprint suffer from the cost of the
garbage collector (GC), because, as the GC scans the
reference graph, it makes many remote memory accesses,
saturating the interconnect between memory nodes. We
address this problem with NumaGiC, a GC with a
mostly-distributed design. In order to maximise memory
access locality during collection, a GC thread avoids
accessing a different memory node, instead notifying a
remote GC thread with a message; nonetheless, NumaGiC
avoids the drawbacks of a pure distributed design,
which tends to decrease parallelism. We compare NumaGiC
with Parallel Scavenge and NAPS on two different ccNUMA
architectures running on the Hotspot Java Virtual
Machine of OpenJDK 7. On Spark and Neo4j, two
industry-strength analytics applications, with heap
sizes ranging from 160GB to 350GB, and on SPECjbb2013
and SPECjbb2005, ourgc improves overall performance by
up to 45\% over NAPS (up to 94\% over Parallel
Scavenge), and increases the performance of the
collector itself by up to 3.6x over NAPS (up to 5.4x
over Parallel Scavenge).",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Havran:2015:EBT,
author = "Vlastimil Havran and Petr Egert",
title = "Extensions to bidirectional texture function
compression with multi-level vector quantization in
{OpenCL}",
journal = j-COMPUTERS-AND-GRAPHICS,
volume = "48",
number = "??",
pages = "1--10",
month = may,
year = "2015",
CODEN = "COGRD2",
ISSN = "0097-8493 (print), 1873-7684 (electronic)",
ISSN-L = "0097-8493",
bibdate = "Fri Apr 24 17:46:30 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compgraph.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0097849315000060",
acknowledgement = ack-nhfb,
fjournal = "Computers \& Graphics",
journal-URL = "http://www.sciencedirect.com/science/journal/00978493/",
}
@Article{Hoefler:2015:RMA,
author = "Torsten Hoefler and James Dinan and Rajeev Thakur and
Brian Barrett and Pavan Balaji and William Gropp and
Keith Underwood",
title = "Remote Memory Access Programming in {MPI-3}",
journal = j-TOPC,
volume = "2",
number = "2",
pages = "9:1--9:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2780584",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Fri Aug 7 10:22:35 MDT 2015",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "The Message Passing Interface (MPI) 3.0 standard,
introduced in September 2012, includes a significant
update to the one-sided communication interface, also
known as remote memory access (RMA). In particular, the
interface has been extended to better support popular
one-sided and global-address-space parallel programming
models to provide better access to hardware performance
features and enable new data-access modes. We present
the new RMA interface and specify formal axiomatic
models for data consistency and access semantics. Such
models can help users reason about details of the
semantics that are hard to extract from the English
prose in the standard. It also fosters the development
of tools and compilers, enabling them to automatically
analyze, optimize, and debug RMA programs.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Jaaskelainen:2015:PPP,
author = "Pekka J{\"a}{\"a}skel{\"a}inen and Carlos {S{\'a}nchez
de La Lama} and Erik Schnetter and Kalle Raiskila and
Jarmo Takala and Heikki Berg",
title = "{pocl}: A Performance-Portable {OpenCL}
Implementation",
journal = j-INT-J-PARALLEL-PROG,
volume = "43",
number = "5",
pages = "752--785",
month = oct,
year = "2015",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-014-0320-y",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Aug 8 12:34:17 MDT 2015",
bibsource = "http://link.springer.com/journal/10766/43/5;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-014-0320-y",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Jaeger:2015:FGD,
author = "Julien Jaeger and Patrick Carribault and Marc
P{\'e}rache",
title = "Fine-grain data management directory for {OpenMP 4.0}
and {OpenACC}",
journal = j-CCPE,
volume = "27",
number = "6",
pages = "1528--1539",
day = "25",
month = apr,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3352",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Jul 25 19:54:07 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "13 Aug 2014",
}
@Article{Jo:2015:ALM,
author = "Gangwon Jo and Jeongho Nah and Jun Lee and Jungwon Kim
and Jaejin Lee",
title = "Accelerating {LINPACK} with {MPI-OpenCL} on Clusters
of Multi-{GPU} Nodes",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "26",
number = "7",
pages = "1814--1825",
month = jul,
year = "2015",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2014.2321742",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Mon Aug 3 11:58:51 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.computer.org/csdl/trans/td/2015/07/06846313-abs.html",
abstract-URL = "http://www.computer.org/csdl/trans/td/2015/07/06846313-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Kaliman:2015:SNU,
author = "Ilya A. Kaliman and Lyudmila V. Slipchenko",
title = "Software News and Updates: Hybrid {MPI\slash OpenMP}
parallelization of the effective fragment potential
method in the {{\tt libefp}} software library",
journal = j-J-COMPUT-CHEM,
volume = "36",
number = "2",
pages = "129--135",
day = "15",
month = jan,
year = "2015",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.23772",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Fri Mar 6 15:50:38 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651",
onlinedate = "13 Nov 2014",
}
@Article{Karami:2015:SPA,
author = "Ali Karami and Farshad Khunjush and Seyyed Ali
Mirsoleimani",
title = "A statistical performance analyzer framework for
{OpenCL} kernels on {Nvidia GPUs}",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "8",
pages = "2900--2921",
month = aug,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1338-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Aug 8 12:23:12 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/8;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1338-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Kim:2015:OBU,
author = "Jungwon Kim and Seyong Lee and Jeffrey S. Vetter",
title = "An {OpenACC}-based unified programming model for
multi-accelerator systems",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "257--258",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688531",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper proposes a novel SPMD programming model of
OpenACC. Our model integrates the different
granularities of parallelism from vector-level
parallelism to node-level parallelism into a single,
unified model based on OpenACC. It allows programmers
to write programs for multiple accelerators using a
uniform programming model whether they are in shared or
distributed memory systems. We implement a prototype of
our model and evaluate its performance with a GPU-based
supercomputer using three benchmark applications.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@InProceedings{Klawonn:2015:HMO,
author = "Axel Klawonn and Martin Lanser and Oliver Rheinbach
and Holger Stengel and Gerhard Wellein",
title = "Hybrid {MPI\slash OpenMP} Parallelization in
{FETI--DP} Methods",
crossref = "Mehl:2015:RTC",
volume = "105",
pages = "67--84",
year = "2015",
DOI = "https://doi.org/10.1007/978-3-319-22997-3_4",
bibdate = "Sat Dec 12 10:22:10 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/chapter/10.1007/978-3-319-22997-3_4/",
acknowledgement = ack-nhfb,
book-DOI = "https://doi.org/10.1007/978-3-319-22997-3",
book-URL = "http://www.springerlink.com/content/978-3-319-22997-3",
}
@Article{Komura:2015:OPS,
author = "Yukihiro Komura",
title = "{OpenACC} programs of the {Swendsen--Wang}
multi-cluster spin flip algorithm",
journal = j-COMP-PHYS-COMM,
volume = "197",
number = "??",
pages = "298--303",
month = dec,
year = "2015",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Nov 11 06:05:22 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515003197",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Kouzinopoulos:2015:MSM,
author = "Charalampos S. Kouzinopoulos and Panagiotis D.
Michailidis and Konstantinos G. Margaritis",
title = "Multiple String Matching on a {GPU} using {CUDAs}",
journal = j-SCPE,
volume = "16",
number = "2",
pages = "121--138",
month = "????",
year = "2015",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:46 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib;
http://www.math.utah.edu/pub/tex/bib/string-matching.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1085",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Kovanen:2015:TAC,
author = "Janne Kovanen and Tapani Sarjakoski",
title = "Tilewise Accumulated Cost Surface Computation with
Graphics Processing Units",
journal = j-TSAS,
volume = "1",
number = "2",
pages = "8:1--8:27",
month = nov,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2803172",
ISSN = "2374-0353 (print), 2374-0361 (electronic)",
ISSN-L = "2374-0353",
bibdate = "Thu Jun 15 14:51:01 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tsas.bib",
URL = "http://dl.acm.org/citation.cfm?id=2803172",
abstract = "Accumulated cost surfaces are used in a variety of
fields that employ spatial analysis. Several algorithms
have been suggested in the past for solving them
efficiently or with minimal errors. Meanwhile, a new
wave on the technological frontier has brought about
general-purpose computing on GPUs. In this article, we
describe how accumulated cost surfaces can be solved
with CUDA. To verify the performance of our solution,
we performed an experimental comparison against
implementations run on a CPU. Our results with
realistic cost models indicate that the move to GPUs
can engender a speed-up of an order of magnitude.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Spatial Algorithms and Systems
(TSAS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J1514",
}
@Article{Kramer:2015:SET,
author = "Stephan C. Kramer and Johannes Hagemann",
title = "{SciPAL}: Expression Templates and Composition Closure
Objects for High Performance Computational Physics with
{CUDA} and {OpenMP}",
journal = j-TOPC,
volume = "1",
number = "2",
pages = "15:1--15:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2686886",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Wed Feb 18 16:46:00 MST 2015",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "We present SciPAL (scientific parallel algorithms
library), a C ++-based, hardware-independent
open-source library. Its core is a domain-specific
embedded language for numerical linear algebra. The
main fields of application are finite element
simulations, coherent optics and the solution of
inverse problems. Using SciPAL algorithms can be stated
in a mathematically intuitive way in terms of matrix
and vector operations. Existing algorithms can easily
be adapted to GPU-based computing by proper template
specialization. Our library is compatible with the
finite element library deal .II and provides a port of
deal.II's most frequently used linear algebra classes
to CUDA (NVidia's extension of the programming
languages C and C ++ for programming their GPUs).
SciPAL 's operator-based API for BLAS operations
particularly aims at simplifying the usage of NVidia's
CUBLAS. For non-BLAS array arithmetic SciPAL 's
expression templates are able to generate CUDA kernels
at compile time. We demonstrate the benefits of SciPAL
using the iterative principal component analysis as
example which is the core algorithm for the
spike-sorting problem in neuroscience.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Laguna:2015:DPF,
author = "Ignacio Laguna and Dong H. Ahn and Bronis R. de
Supinski and Saurabh Bagchi and Todd Gamblin",
title = "Diagnosis of Performance Faults in {LargeScale} {MPI}
Applications via Probabilistic Progress-Dependence
Inference",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "26",
number = "5",
pages = "1280--1289",
month = may,
year = "2015",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2014.2314100",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Jun 4 19:34:11 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://csdl.computer.org/csdl/trans/td/2015/05/06803050-abs.html",
abstract-URL = "http://csdl.computer.org/csdl/trans/td/2015/05/06803050-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Lashgar:2015:CSR,
author = "Ahmad Lashgar and Ebad Salehi and Amirali Baniasadi",
title = "A Case Study in Reverse Engineering {GPGPUs}:
Outstanding Memory Handling Resources",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "15--21",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927968",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "During recent years, GPU micro-architectures have
changed dramatically, evolving into powerful many-core
deep-multithreaded platforms for parallel workloads.
While important micro-architectural modifications
continue to appear in every new generation of these
processors, unfortunately, little is known about the
details of these innovative designs. One of the key
questions in understanding GPUs is how they deal with
outstanding memory misses. Our goal in this study is to
find answers to this question. To this end, we develop
a set of micro-benchmarks in CUDA to understand the
outstanding memory requests handling resources.
Particularly, we study two NVIDIA GPGPUs (Fermi and
Kepler) and estimate their capability in handling
outstanding memory requests. We show that Kepler can
issue nearly 32X higher number of outstanding memory
requests, compared to Fermi. We explain this
enhancement by Kepler's architectural modifications in
outstanding memory request handling resources.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "HEART '15 conference proceedings.",
}
@Article{Lee:2015:GCE,
author = "J. Lee and D. H. Woo and H. Kim and M. Azimi",
title = "{GREEN} Cache: Exploiting the Disciplined Memory Model
of {OpenCL} on {GPUs}",
journal = j-IEEE-TRANS-COMPUT,
volume = "64",
number = "11",
pages = "3167--3180",
month = nov,
year = "2015",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2015.2395435",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Tue Oct 13 06:51:52 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Lee:2015:OPE,
author = "Joo Hwan Lee and Nimit Nigania and Hyesoon Kim and
Kaushik Patel and Hyojong Kim",
title = "{OpenCL} Performance Evaluation on Modern Multicore
{CPUs}",
journal = j-SCI-PROG,
volume = "2015",
number = "??",
pages = "859491:1--859491:20",
month = "????",
year = "2015",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.1155/2015/859491",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Sep 20 07:53:44 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
URL = "https://www.hindawi.com/journals/sp/2015/859491/",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "https://www.hindawi.com/journals/sp/",
journalabr = "Sci. Prog",
}
@Article{Li:2015:AMR,
author = "Jiansen Li and Jianqi Sun and Ying Song and Jun Zhao",
title = "Accelerating {MRI} reconstruction via
three-dimensional dual-dictionary learning using
{CUDA}",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "7",
pages = "2381--2396",
month = jul,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-015-1386-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Aug 8 12:23:11 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/7;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-015-1386-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Lidbury:2015:MCC,
author = "Christopher Lidbury and Andrei Lascu and Nathan Chong
and Alastair F. Donaldson",
title = "Many-core compiler fuzzing",
journal = j-SIGPLAN,
volume = "50",
number = "6",
pages = "65--76",
month = jun,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2813885.2737986",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:41 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We address the compiler correctness problem for
many-core systems through novel applications of fuzz
testing to OpenCL compilers. Focusing on two methods
from prior work, random differential testing and
testing via equivalence modulo inputs (EMI), we present
several strategies for random generation of
deterministic, communicating OpenCL kernels, and an
injection mechanism that allows EMI testing to be
applied to kernels that otherwise exhibit little or no
dynamically-dead code. We use these methods to conduct
a large, controlled testing campaign with respect to 21
OpenCL (device, compiler) configurations, covering a
range of CPU, GPU, accelerator, FPGA and emulator
implementations. Our study provides independent
validation of claims in prior work related to the
effectiveness of random differential testing and EMI
testing, proposes novel methods for lifting these
techniques to the many-core setting and reveals a
significant number of OpenCL compiler bugs in
commercial implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '15 conference proceedings.",
}
@Article{Lopez:2015:PBV,
author = "Hugo A. L{\'o}pez and Eduardo R. B. Marques and
Francisco Martins and Nicholas Ng and C{\'e}sar Santos
and Vasco Thudichum Vasconcelos and Nobuko Yoshida",
title = "Protocol-based verification of message-passing
parallel programs",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "280--298",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814302",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present ParTypes, a type-based methodology for the
verification of Message Passing Interface (MPI)
programs written in the C programming language. The aim
is to statically verify programs against protocol
specifications, enforcing properties such as fidelity
and absence of deadlocks. We develop a protocol
language based on a dependent type system for
message-passing parallel programs, which includes
various communication operators, such as point-to-point
messages, broadcast, reduce, array scatter and gather.
For the verification of a program against a given
protocol, the protocol is first translated into a
representation read by VCC, a software verifier for C.
We successfully verified several MPI programs in a
running time that is independent of the number of
processes or other input parameters. This contrasts
with alternative techniques, notably model checking and
runtime verification, that suffer from the
state-explosion problem or that otherwise depend on
parameters to the program itself. We experimentally
evaluated our approach against state-of-the-art tools
for MPI to conclude that our approach offers a scalable
solution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Lorentz:2015:AMS,
author = "Istvan Lorentz and Razvan Andonie and Levente
Fabry-Asztalos",
title = "Accelerating Molecular Structure Determination Based
on Inter-Atomic Distances Using {OpenCL}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "26",
number = "12",
pages = "3250--3263",
month = dec,
year = "2015",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2014.2385712",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Nov 17 06:28:07 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://csdl.computer.org/csdl/trans/td/2015/12/06995963-abs.html",
abstract-URL = "http://csdl.computer.org/csdl/trans/td/2015/12/06995963-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Lotfi:2015:AAC,
author = "Atieh Lotfi and Abbas Rahimi and Luca Benini and
Rajesh K. Gupta",
title = "Aging-Aware Compilation for {GP-GPUs}",
journal = j-TACO,
volume = "12",
number = "2",
pages = "24:1--24:??",
month = jul,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2778984",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Aug 7 09:46:00 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "General-purpose graphic processing units (GP-GPUs)
offer high computational throughput using thousands of
integrated processing elements (PEs). These PEs are
stressed during workload execution, and negative bias
temperature instability (NBTI) adversely affects their
reliability by introducing new delay-induced faults.
However, the effect of these delay variations is not
uniformly spread across the PEs: some are affected more
--- hence less reliable --- than others. This variation
causes significant reduction in the lifetime of GP-GPU
parts. In this article, we address the problem of
``wear leveling'' across processing units to mitigate
lifetime uncertainty in GP-GPUs. We propose innovations
in the static compiled code that can improve healing in
PEs and stream cores (SCs) based on their degradation
status. PE healing is a fine-grained very long
instruction word (VLIW) slot assignment scheme that
balances the stress of instructions across the PEs
within an SC. SC healing is a coarse-grained workload
allocation scheme that distributes workload across SCs
in GP-GPUs. Both schemes share a common property: they
adaptively shift workload from less reliable units to
more reliable units, either spatially or temporally.
These software schemes are based on online calibration
with NBTI monitoring that equalizes the expected
lifetime of PEs and SCs by regenerating adaptive
compiled codes to respond to the specific health state
of the GP-GPUs. We evaluate the effectiveness of the
proposed schemes for various OpenCL kernels from the
AMD APP SDK on Evergreen and Southern Island GPU
architectures. The aging-aware healthy kernels
generated by the PE (or SC) healing scheme reduce
NBTI-induced voltage threshold shift by 30\% (77\% in
the case of SCs), with no (moderate) performance
penalty compared to the naive kernels.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Markidis:2015:OAN,
author = "Stefano Markidis and Jing Gong and Michael Schliephake
and Erwin Laure and Alistair Hart and David Henty and
Katherine Heisey and Paul Fischer",
title = "{OpenACC} acceleration of the {Nek5000} spectral
element code",
journal = j-IJHPCA,
volume = "29",
number = "3",
pages = "311--319",
month = aug,
year = "2015",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Apr 4 14:51:30 MDT 2017",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Martin:2015:EPM,
author = "Gonzalo Mart{\'\i}n and David E. Singh and
Maria-Cristina Marinescu and Jes{\'u}s Carretero",
title = "Enhancing the performance of malleable {MPI}
applications by using performance-aware dynamic
reconfiguration",
journal = j-PARALLEL-COMPUTING,
volume = "46",
number = "??",
pages = "60--77",
month = jul,
year = "2015",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Jun 17 11:37:27 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819115000642",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Mehta:2015:MTP,
author = "Kshitij Mehta and Edgar Gabriel",
title = "Multi-Threaded Parallel {I/O} for {OpenMP}
Applications",
journal = j-INT-J-PARALLEL-PROG,
volume = "43",
number = "2",
pages = "286--309",
month = apr,
year = "2015",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-014-0306-9",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Aug 8 12:34:16 MDT 2015",
bibsource = "http://link.springer.com/journal/10766/43/2;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-014-0306-9",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Momeni:2015:EEO,
author = "Amir Momeni and Hamed Tabkhi and Yash Ukidave and
Gunar Schirner and David Kaeli",
title = "Exploring the Efficiency of the {OpenCL} Pipe Semantic
on an {FPGA}",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "4",
pages = "52--57",
month = sep,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2927964.2927974",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Fri Apr 22 17:03:53 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "This paper evaluates the potential benefits of
leveraging the OpenCL Pipe semantic to accelerate
FPGA-based applications. Our work focuses on streaming
applications in the embedded vision processing domain.
These applications are well-suited for concurrent
kernel execution support and inter-kernel communication
enabled by using OpenCL pipes. We analyze the impact of
multiple design factors and application optimizations
to improve the performance offered by OpenCL Pipes. The
design tradeoffs considered include: the execution
granularity across kernels, the rate and volume of data
transfers, and the Pipe size. For our case study
application of vision ow, we observe a 2.8X increase in
throughput for tuned pipelined kernels, as compared to
non-pipelined execution. In addition, we propose a
novel mechanism to efficiently capture the behavior for
2-dimensional (2D) vision algorithms to benefit
Pipe-based execution.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "HEART '15 conference proceedings.",
}
@Article{Muddukrishna:2015:LAT,
author = "Ananya Muddukrishna and Peter A. Jonsson and Mats
Brorsson",
title = "Locality-Aware Task Scheduling and Data Distribution
for {OpenMP} Programs on {NUMA} Systems and Manycore
Processors",
journal = j-SCI-PROG,
volume = "2015",
number = "??",
pages = "981759:1--981759:16",
month = "????",
year = "2015",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.1155/2015/981759",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Sep 20 07:53:44 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
URL = "https://www.hindawi.com/journals/sp/2015/981759/",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "https://www.hindawi.com/journals/sp/",
journalabr = "Sci. Prog",
}
@Article{Muralidharan:2015:COP,
author = "Saurav Muralidharan and Michael Garland and Bryan
Catanzaro and Albert Sidelnik and Mary Hall",
title = "A collection-oriented programming model for
performance portability",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "263--264",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688537",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper describes Surge, a collection-oriented
programming model that enables programmers to compose
parallel computations using nested high-level data
collections and operators. Surge exposes a code
generation interface, decoupled from the core
computation, that enables programmers and autotuners to
easily generate multiple implementations of the same
computation on various parallel architectures such as
multi-core CPUs and GPUs. By decoupling computations
from architecture-specific implementation, programmers
can target multiple architectures more easily, and
generate a search space that facilitates optimization
and customization for specific architectures. We
express in Surge four real-world benchmarks from
domains such as sparse linear-algebra and machine
learning and from the same performance-portable
specification, generate OpenMP and CUDA C++
implementations. Surge generates efficient, scalable
code which achieves up to 1.32x speedup over
handcrafted, well-optimized CUDA code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Obrecht:2015:PEO,
author = "Christian Obrecht and Bernard Tourancheau and
Fr{\'e}d{\'e}ric Kuznik",
title = "Performance Evaluation of an {OpenCL} Implementation
of the {Lattice Boltzmann Method} on the {Intel Xeon
Phi}",
journal = j-PARALLEL-PROCESS-LETT,
volume = "25",
number = "3",
pages = "1541001",
month = sep,
year = "2015",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626415410017",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Tue May 29 09:05:25 MDT 2018",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Orr:2015:SUR,
author = "Marc S. Orr and Shuai Che and Ayse Yilmazer and
Bradford M. Beckmann and Mark D. Hill and David A.
Wood",
title = "Synchronization Using Remote-Scope Promotion",
journal = j-COMP-ARCH-NEWS,
volume = "43",
number = "1",
pages = "73--86",
month = mar,
year = "2015",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/2786763.2694350",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Wed Jun 3 11:27:38 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Heterogeneous system architecture (HSA) and OpenCL
define scoped synchronization to facilitate low
overhead communication across a subset of threads.
Scoped synchronization works well for static sharing
patterns, where consumer threads are known a priori. It
works poorly for dynamic sharing patterns (e.g., work
stealing) where programmers cannot use a faster small
scope due to the rare possibility that the work is
stolen by a thread in a distant slower scope. This puts
programmers in a conundrum: optimize the common case by
synchronizing at a faster small scope or use work
stealing at a slower large scope. In this paper, we
propose to extend scoped synchronization with
remote-scope promotion. This allows the most frequent
sharers to synchronize through a small scope.
Infrequent sharers synchronize by promoting that remote
small scope to a larger shared scope. Synchronization
using remote-scope promotion provides performance
robustness for dynamic workloads, where the benefits
provided by scoped synchronization and work stealing
are hard to anticipate. Compared to a na{\"\i}ve
baseline, static scoped synchronization alone achieves
a 1.07x speedup on average and dynamic work stealing
alone achieves a 1.18x speedup on average. In contrast,
synchronization using remote-scope promotion achieves a
robust 1.25x speedup on average, across a diverse set
of graph benchmarks and inputs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ASPLOS'15 conference proceedings.",
}
@Article{Owaida:2015:EDS,
author = "Muhsen Owaida and Gabriel Falcao and Joao Andrade and
Christos Antonopoulos and Nikolaos Bellas and Madhura
Purnaprajna and David Novo and Georgios Karakonstantis
and Andreas Burg and Paolo Ienne",
title = "Enhancing Design Space Exploration by Extending
{CPU\slash GPU} Specifications onto {FPGAs}",
journal = j-TECS,
volume = "14",
number = "2",
pages = "33:1--33:??",
month = mar,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2656207",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Mar 26 05:58:56 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "The design cycle for complex special-purpose computing
systems is extremely costly and time-consuming. It
involves a multiparametric design space exploration for
optimization, followed by design verification.
Designers of special purpose VLSI implementations often
need to explore parameters, such as optimal bitwidth
and data representation, through time-consuming Monte
Carlo simulations. A prominent example of this
simulation-based exploration process is the design of
decoders for error correcting systems, such as the
Low-Density Parity-Check (LDPC) codes adopted by modern
communication standards, which involves thousands of
Monte Carlo runs for each design point. Currently,
high-performance computing offers a wide set of
acceleration options that range from multicore CPUs to
Graphics Processing Units (GPUs) and Field Programmable
Gate Arrays (FPGAs). The exploitation of diverse target
architectures is typically associated with developing
multiple code versions, often using distinct
programming paradigms. In this context, we evaluate the
concept of retargeting a single OpenCL program to
multiple platforms, thereby significantly reducing
design time. A single OpenCL-based parallel kernel is
used without modifications or code tuning on multicore
CPUs, GPUs, and FPGAs. We use SOpenCL (Silicon to
OpenCL), a tool that automatically converts OpenCL
kernels to RTL in order to introduce FPGAs as a
potential platform to efficiently execute simulations
coded in OpenCL. We use LDPC decoding simulations as a
case study. Experimental results were obtained by
testing a variety of regular and irregular LDPC codes
that range from short/medium (e.g., 8,000 bit) to long
length (e.g., 64,800 bit) DVB-S2 codes. We observe
that, depending on the design parameters to be
simulated, on the dimension and phase of the design,
the GPU or FPGA may suit different purposes more
conveniently, thus providing different acceleration
factors over conventional multicore CPUs.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840",
}
@Article{Power:2015:GGH,
author = "Jason Power and Joel Hestness and Marc S. Orr and Mark
D. Hill and David A. Wood",
title = "{gem5-gpu}: A Heterogeneous {CPU--GPU} Simulator",
journal = j-IEEE-COMPUT-ARCHIT-LETT,
volume = "14",
number = "1",
pages = "34--36",
month = jan # "\slash " # jun,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1109/LCA.2014.2299539",
ISSN = "1556-6056 (print), 1556-6064 (electronic)",
ISSN-L = "1556-6056",
bibdate = "Thu Jun 20 17:18:18 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "gem5-gpu is a new simulator that models tightly
integrated CPU-GPU systems. It builds on gem5, a
modular full-system CPU simulator, and GPGPU-Sim, a
detailed GPGPU simulator. gem5-gpu routes most memory
accesses through Ruby, which is a highly configurable
memory system in gem5. By doing this, it is able to
simulate many system configurations, ranging from a
system with coherent caches and a single virtual
address space across the CPU and GPU to a system that
maintains separate GPU and CPU physical address spaces.
gem5-gpu can run most unmodified CUDA 3.2 source code.
Applications can launch non-blocking kernels, allowing
the CPU and GPU to execute simultaneously. We present
gem5-gpu's software architecture and a brief
performance validation. We also discuss possible
extensions to the simulator. gem5-gpu is open source
and available at gem5-gpu.cs.wisc.edu.",
acknowledgement = ack-nhfb,
affiliation = "Power, J (Reprint Author), Univ Wisconsin, Dept Comp
Sci, 1210 W Dayton St, Madison, WI 53706 USA. Power,
Jason; Hestness, Joel; Orr, Marc S.; Hill, Mark D.;
Wood, David A., Univ Wisconsin, Dept Comp Sci, Madison,
WI 53706 USA.",
author-email = "powerjg@cs.wisc.edu hestness@cs.wisc.edu
morr@cs.wisc.edu markhill@cs.wisc.edu
david@cs.wisc.edu",
da = "2019-06-20",
doc-delivery-number = "CL1QK",
eissn = "1556-6064",
fjournal = "IEEE Computer Architecture Letters",
journal-iso = "IEEE Comput. Archit. Lett.",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208",
keywords = "general-purpose graphics processors; heterogeneous
(hybrid) systems; Modeling techniques; simulators",
number-of-cited-references = "9",
research-areas = "Computer Science",
times-cited = "62",
unique-id = "Power:2015:GGH",
web-of-science-categories = "Computer Science, Hardware \&
Architecture",
}
@Article{Reano:2015:IUE,
author = "Carlos Rea{\~n}o and Federico Silla and Adri{\'a}n
Castell{\'o} and Antonio J. Pe{\~n}a and Rafael Mayo
and Enrique S. Quintana-Ort{\'\i} and Jos{\'e} Duato",
title = "Improving the user experience of the {rCUDA} remote
{GPU} virtualization framework",
journal = j-CCPE,
volume = "27",
number = "14",
pages = "3746--3770",
day = "25",
month = sep,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3409",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Sep 28 09:32:54 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "10 Oct 2014",
}
@Article{Rico-Gallego:2015:ILM,
author = "Juan-Antonio Rico-Gallego and Juan-Carlos
D{\'\i}az-Mart{\'\i}n",
title = "{$ \tau $-Lop}: Modeling performance of shared memory
{MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "46",
number = "??",
pages = "14--31",
month = jul,
year = "2015",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed Jun 17 11:37:27 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819115000447",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Rodriguez:2015:OPI,
author = "Marcos Rodr{\'\i}guez and Fernando Blesa and Roberto
Barrio",
title = "{OpenCL} parallel integration of ordinary differential
equations: Applications in computational dynamics",
journal = j-COMP-PHYS-COMM,
volume = "192",
number = "??",
pages = "228--236",
month = jul,
year = "2015",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Apr 21 11:56:04 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515000703",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Sack:2015:CAM,
author = "Paul Sack and William Gropp",
title = "Collective Algorithms for Multiported Torus Networks",
journal = j-TOPC,
volume = "1",
number = "2",
pages = "12:1--12:??",
month = jan,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2686882",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Wed Feb 18 16:46:00 MST 2015",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "Modern supercomputers with torus networks allow each
node to simultaneously pass messages on all of its
links. However, most collective algorithms are designed
to only use one link at a time. In this work, we
present novel multiported algorithms for the scatter,
gather, all-gather, and reduce-scatter operations. Our
algorithms can be combined to create multiported
reduce, all-reduce, and broadcast algorithms. Several
of these algorithms involve a new technique where we
relax the MPI message-ordering constraints to achieve
high performance and restore the correct ordering using
an additional stage of redundant communication.
According to our models, on an $n$-dimensional torus,
our algorithms should allow for nearly a $ 2 n$-fold
improvement in communication performance compared to
known, single-ported torus algorithms. In practice, we
have achieved nearly $ 6 \times $ better performance on
a 32k-node 3-dimensional torus.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Saillard:2015:SDV,
author = "Emmanuelle Saillard and Patrick Carribault and Denis
Barthou",
title = "Static\slash dynamic validation of {MPI} collective
communications in multi-threaded context",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "279--280",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688548",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scientific applications mainly rely on the MPI
parallel programming model to reach high performance on
supercomputers. The advent of manycore architectures
(larger number of cores and lower amount of memory per
core) leads to mix MPI with a thread-based model like
OpenMP. But integrating two different programming
models inside the same application can be tricky and
generate complex bugs. Thus, the correctness of hybrid
programs requires a special care regarding MPI calls
location. For example, identical MPI collective
operations cannot be performed by multiple
non-synchronized threads. To tackle this issue, this
paper proposes a static analysis and a reduced dynamic
instrumentation to detect bugs related to misuse of MPI
collective operations inside or outside threaded
regions. This work extends PARCOACH designed for
MPI-only applications and keeps the compatibility with
these algorithms. We validated our method on multiple
hybrid benchmarks and applications with a low
overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Shterenlikht:2015:FC,
author = "Anton Shterenlikht and Lee Margetts and Luis Cebamanos
and David Henty",
title = "{Fortran 2008} coarrays",
journal = j-FORTRAN-FORUM,
volume = "34",
number = "1",
pages = "10--30",
month = apr,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2754942.2754944",
ISSN = "1061-7264 (print), 1931-1311 (electronic)",
ISSN-L = "1061-7264",
bibdate = "Mon Aug 10 06:22:12 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran-forum.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Coarrays are a Fortran 2008 standard feature intended
for SIMD type parallel programming. The runtime
environment starts a number of identical executable
images of the coarray program, on multiple processors,
which could be actual physical processors or threads.
Each image has a unique number and its private address
space. Ordinary variables are private to an image.
Coarray variables are available for read/write access
from any other image. Coarray communications are of
``single sided'' type, i.e. a remote call from image A
to image B does not need to be accompanied by a
corresponding call in image B. This feature makes
coarray programming a lot simpler than MPI. The
standard provides synchronisation intrinsics to help
avoid race conditions or deadlocks. Any ordinary
variable can be made into a coarray --- scalars,
arrays, intrinsic or derived data types, pointers,
allocatables are all allowed. Coarrays can be declared
in, and passed to, procedures. Coarrays are thus very
flexible and can be used for a number of purposes. For
example a collection of coarrays from all or some
images can be thought of as a large single array. This
is precisely the inverse of the model partitioning
logic, typical in MPI programs. A coarray program can
exploit functional parallelism too, by delegating
distinct tasks to separate images or teams of images.
Coarray collectives are expected to become a part of
the next version of the Fortran standard. A major
unresolved problem of coarray programming is the lack
of standard parallel I/O facility in Fortran. In this
paper several simple complete coarray programs are
shown and compared to alternative parallel technologies
--- OpenMP, MPI and Fortran 2008 intrinsic ``do
concurrent''. Inter-image communication patterns and
data transfer are illustrated. An example of a
materials microstructure simulation coarray program
scaled up to 32k cores is shown. Problems with coarray
I/O at this scale are highlighted and addressed with
the use of MPI-I/O. A hybrid MPI/coarray programming is
discussed and illustrated with a finite
element/cellular automata (CAF{\'E}) multi-scale model.
The paper completes with a description of the new
coarray language features, expected in the 2015 Fortran
standard, and with a brief list of coarray resources",
acknowledgement = ack-nhfb,
fjournal = "ACM Fortran Forum",
journal-URL = "http://portal.acm.org/toc.cfm?id=J286",
}
@Article{Sosonkina:2015:RAV,
author = "Masha Sosonkina and Layne T. Watson and Jian He",
title = "Remark on Algorithm 897: {VTDIRECT95}: Serial and
Parallel Codes for the Global Optimization Algorithm
{DIRECT}",
journal = j-TOMS,
volume = "41",
number = "3",
pages = "22:1--22:2",
month = jun,
year = "2015",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2699459",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Jun 3 17:59:32 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
note = "See \cite{He:2009:AVS}.",
abstract = "The Fortran95 code VTDIRECT95, based on the original
MPI, has been modified to use MPI-2. An option for
VTDIRECT95 is to divide the feasible box into
subdomains, and concurrently apply the global direct
search algorithm DIRECT within each subdomain. When the
number of subdomains is greater than one, a bug causes
VTDIRECT95 to occasionally sample outside the given
feasible box, which is serious if the objective
function is not defined outside the given box. This bug
has been fixed, and the sample output files have been
updated to reflect the correction. For completeness,
the package VTDIRECT95 now contains both the MPI-1
(with the multiple subdomain bug fixed) and the MPI-2
versions of the code.",
acknowledgement = ack-nhfb,
articleno = "22",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Spencer:2015:DLN,
author = "Matt Spencer and Jesse Eickholt and Jianlin Cheng",
title = "A deep learning network approach to ab initio protein
secondary structure prediction",
journal = j-TCBB,
volume = "12",
number = "1",
pages = "103--112",
month = jan,
year = "2015",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2014.2343960",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Fri Aug 28 05:40:09 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "Ab initio protein secondary structure (SS) predictions
are utilized to generate tertiary structure
predictions, which are increasingly demanded due to the
rapid discovery of proteins. Although recent
developments have slightly exceeded previous methods of
SS prediction, accuracy has stagnated around 80 percent
and many wonder if prediction cannot be advanced beyond
this ceiling. Disciplines that have traditionally
employed neural networks are experimenting with novel
deep learning techniques in attempts to stimulate
progress. Since neural networks have historically
played an important role in SS prediction, we wanted to
determine whether deep learning could contribute to the
advancement of this field as well. We developed an SS
predictor that makes use of the position-specific
scoring matrix generated by PSI-BLAST and deep learning
network architectures, which we call DNSS. Graphical
processing units and CUDA software optimize the deep
network architecture and efficiently train the deep
networks. Optimal parameters for the training process
were determined, and a workflow comprising three
separately trained deep networks was constructed in
order to make refined predictions. This deep learning
network approach was used to predict SS for a fully
independent test dataset of 198 proteins, achieving a
Q3 accuracy of 80.7 percent and a Sov accuracy of 74.2
percent.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Spiechowicz:2015:GAM,
author = "J. Spiechowicz and M. Kostur and L. Machura",
title = "{GPU} accelerated {Monte Carlo} simulation of
{Brownian} motors dynamics with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "191",
number = "??",
pages = "140--149",
month = jun,
year = "2015",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Apr 24 18:44:55 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515000417",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Steuwer:2015:GPP,
author = "Michel Steuwer and Christian Fensch and Sam Lindley
and Christophe Dubach",
title = "Generating performance portable code using rewrite
rules: from high-level functional expressions to
high-performance {OpenCL} code",
journal = j-SIGPLAN,
volume = "50",
number = "9",
pages = "205--217",
month = sep,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858949.2784754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computers have become increasingly complex with the
emergence of heterogeneous hardware combining multicore
CPUs and GPUs. These parallel systems exhibit
tremendous computational power at the cost of increased
programming effort resulting in a tension between
performance and code portability. Typically, code is
either tuned in a low-level imperative language using
hardware-specific optimizations to achieve maximum
performance or is written in a high-level, possibly
functional, language to achieve portability at the
expense of performance. We propose a novel approach
aiming to combine high-level programming, code
portability, and high-performance. Starting from a
high-level functional expression we apply a simple set
of rewrite rules to transform it into a low-level
functional representation, close to the OpenCL
programming model, from which OpenCL code is generated.
Our rewrite rules define a space of possible
implementations which we automatically explore to
generate hardware-specific OpenCL implementations. We
formalize our system with a core dependently-typed
lambda-calculus along with a denotational semantics
which we use to prove the correctness of the rewrite
rules. We test our design in practice by implementing a
compiler which generates high performance imperative
OpenCL code. Our experiments show that we can
automatically derive hardware-specific implementations
from simple functional high-level algorithmic
expressions offering performance on a par with highly
tuned code for multicore CPUs and GPUs written by
experts.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ICFP '15 conference proceedings.",
}
@Article{Takizawa:2015:ODT,
author = "Hiroyuki Takizawa and Shoichi Hirasawa and Makoto
Sugawara and Isaac Gelado and Hiroaki Kobayashi and
Wen-mei W. Hwu",
title = "Optimized Data Transfers Based on the {OpenCL} Event
Management Mechanism",
journal = j-SCI-PROG,
volume = "2015",
number = "??",
pages = "576498:1--576498:16",
month = "????",
year = "2015",
CODEN = "SCIPEV",
DOI = "https://doi.org/10.1155/2015/576498",
ISSN = "1058-9244 (print), 1875-919X (electronic)",
ISSN-L = "1058-9244",
bibdate = "Tue Sep 20 07:53:44 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sciprogram.bib",
URL = "https://www.hindawi.com/journals/sp/2015/576498/",
acknowledgement = ack-nhfb,
fjournal = "Scientific Programming",
journal-URL = "https://www.hindawi.com/journals/sp/",
journalabr = "Sci. Prog",
}
@Article{Tennyson:2015:MOI,
author = "P. Gerald Tennyson and G. M. Karthik and G.
Phanikumar",
title = "{MPI + OpenCL} implementation of a phase-field method
incorporating {CALPHAD} description of {Gibbs} energies
on heterogeneous computing platforms",
journal = j-COMP-PHYS-COMM,
volume = "186",
number = "??",
pages = "48--64",
month = jan,
year = "2015",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2014.09.014",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Nov 10 08:38:05 MST 2014",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465514003208",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Thebault:2015:SEI,
author = "Lo{\"\i}c Th{\'e}bault and Eric Petit and Quang Dinh",
title = "Scalable and efficient implementation of {$3$D}
unstructured meshes computation: a case study on matrix
assembly",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "120--129",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Exposing massive parallelism on 3D unstructured meshes
computation with efficient load balancing and minimal
synchronizations is challenging. Current approaches
relying on domain decomposition and mesh coloring
struggle to scale with the increasing number of cores
per nodes, especially with new many-core processors. In
this paper, we propose an hybrid approach using domain
decomposition to exploit distributed memory
parallelism, Divide-and-Conquer, D{\&}C, to exploit
shared memory parallelism and improve locality, and
mesh coloring at core level to exploit vectors. It
illustrates a new trade-off for many-cores between
structuredness, memory locality, and vectorization. We
evaluate our approach on the finite element matrix
assembly of an industrial fluid dynamic code developed
by Dassault Aviation. We compare our D{\&}C approach to
domain decomposition and to mesh coloring. D{\&}C
achieves a high parallel efficiency, a good data
locality as well as an improved bandwidth usage. It
competes on current nodes with the optimized pure MPI
version with a minimum 10\% speed-up. D{\&}C shows an
impressive 319x strong scaling on 512 cores (32 nodes)
with only 2000 vertices per core. Finally, the Intel
Xeon Phi version has a performance similar to 10 Intel
E5-2665 Xeon Sandy Bridge cores and 95\% parallel
efficiency on the 60 physical cores. Running on 4 Xeon
Phi (240 cores), D{\&}C has 92\% efficiency on the
physical cores and performance similar to 33 Intel
E5-2665 Xeon Sandy Bridge cores.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Thompson:2015:PCI,
author = "Elizabeth Thompson and Nathan Clem and David A.
Peter",
title = "Parallel {CUDA} implementation of conflict detection
for application to airspace deconfliction",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "10",
pages = "3787--3810",
month = oct,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-015-1467-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Tue Sep 29 10:07:24 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/10;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-015-1467-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Vapirev:2015:IRC,
author = "A. Vapirev and J. Deca and G. Lapenta and S. Markidis
and I. Hur and J.-L. Cambier",
title = "Initial results on computational performance of
{Intel} many integrated core, {Sandy Bridge}, and
graphical processing unit architectures: implementation
of a {$1$D C++\slash OpenMP} electrostatic
particle-in-cell code",
journal = j-CCPE,
volume = "27",
number = "3",
pages = "581--593",
day = "10",
month = mar,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3248",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Jul 25 19:54:06 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "6 Mar 2014",
}
@Article{Verschelde:2015:PHC,
author = "Jan Verschelde and Xiangcheng Yu",
title = "Polynomial homotopy continuation on {GPUs}",
journal = j-ACM-COMM-COMP-ALGEBRA,
volume = "49",
number = "4",
pages = "130--133",
month = dec,
year = "2015",
CODEN = "????",
DOI = "https://doi.org/10.1145/2893803.2893810",
ISSN = "1932-2232 (print), 1932-2240 (electronic)",
ISSN-L = "1932-2232",
bibdate = "Wed Feb 17 16:05:57 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/sigsam.bib",
abstract = "The purpose of the software presentation is to
announce a library to track many solution paths defined
by a polynomial homotopy on a Graphics Processing Unit
(GPU). Developed on NVIDIA graphics cards with CUDA
SDKs, our code is released under the GNU GPL license.
Via the C interface to PHCpack, we can call our GPU
library from Python.",
acknowledgement = ack-nhfb,
fjournal = "ACM Communications in Computer Algebra",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1000",
}
@Article{Wang:2015:AST,
author = "Chun-Kun Wang and Peng-Sheng Chen",
title = "Automatic scoping of task clauses for the {OpenMP}
tasking model",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "3",
pages = "808--823",
month = mar,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1326-3",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Aug 8 12:23:09 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1326-3",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Wickerson:2015:RSP,
author = "John Wickerson and Mark Batty and Bradford M. Beckmann
and Alastair F. Donaldson",
title = "Remote-scope promotion: clarified, rectified, and
verified",
journal = j-SIGPLAN,
volume = "50",
number = "10",
pages = "731--747",
month = oct,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858965.2814283",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:43 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Modern accelerator programming frameworks, such as
OpenCL, organise threads into work-groups. Remote-scope
promotion (RSP) is a language extension recently
proposed by AMD researchers that is designed to enable
applications, for the first time, both to optimise for
the common case of intra-work-group communication
(using memory scopes to provide consistency only within
a work-group) and to allow occasional inter-work-group
communication (as required, for instance, to support
the popular load-balancing idiom of work stealing). We
present the first formal, axiomatic memory model of
OpenCL extended with RSP. We have extended the Herd
memory model simulator with support for OpenCL kernels
that exploit RSP, and used it to discover bugs in
several litmus tests and a work-stealing queue, that
have been used previously in the study of RSP. We have
also formalised the proposed GPU implementation of RSP.
The formalisation process allowed us to identify bugs
in the description of RSP that could result in
well-synchronised programs experiencing memory
inconsistencies. We present and prove sound a new
implementation of RSP that incorporates bug fixes and
requires less non-standard hardware than the original
implementation. This work, a collaboration between
academia and industry, clearly demonstrates how, when
designing hardware support for a new concurrent
language feature, the early application of formal tools
and techniques can help to prevent errors, such as
those we have found, from making it into silicon.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "OOPSLA '15 conference proceedings.",
}
@Article{Yazdanpanah:2015:PHR,
author = "Fahimeh Yazdanpanah and Carlos {\'A}lvarez and Daniel
Jim{\'e}nez-Gonz{\'a}lez and Rosa M. Badia and Mateo
Valero",
title = "{Picos}: a hardware runtime architecture support for
{OmpSs}",
journal = j-FUT-GEN-COMP-SYS,
volume = "53",
number = "??",
pages = "130--139",
month = dec,
year = "2015",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2014.12.010",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Aug 12 13:56:06 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X14002702",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X/",
keywords = "OpenMP",
}
@Article{You:2015:VFO,
author = "Yi-Ping You and Hen-Jung Wu and Yeh-Ning Tsai and
Yen-Ting Chao",
title = "{VirtCL}: a framework for {OpenCL} device abstraction
and management",
journal = j-SIGPLAN,
volume = "50",
number = "8",
pages = "161--172",
month = aug,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2858788.2688505",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "The interest in using multiple graphics processing
units (GPUs) to accelerate applications has increased
in recent years. However, the existing heterogeneous
programming models (e.g., OpenCL) abstract details of
GPU devices at the per-device level and require
programmers to explicitly schedule their kernel tasks
on a system equipped with multiple GPU devices.
Unfortunately, multiple applications running on a
multi-GPU system may compete for some of the GPU
devices while leaving other GPU devices unused.
Moreover, the distributed memory model defined in
OpenCL, where each device has its own memory space,
increases the complexity of managing the memory among
multiple GPU devices. In this article we propose a
framework (called VirtCL) that reduces the programming
burden by acting as a layer between the programmer and
the native OpenCL run-time system for abstracting
multiple devices into a single virtual device and for
scheduling computations and communications among the
multiple devices. VirtCL comprises two main components:
(1) a front-end library, which exposes primary OpenCL
APIs and the virtual device, and (2) a back-end
run-time system (called CLDaemon) for scheduling and
dispatching kernel tasks based on a history-based
scheduler. The front-end library forwards computation
requests to the back-end CLDaemon, which then schedules
and dispatches the requests. We also propose a
history-based scheduler that is able to schedule kernel
tasks in a contention- and communication-aware manner.
Experiments demonstrated that the VirtCL framework
introduced a small overhead (mean of 6\%) but
outperformed the native OpenCL run-time system for most
benchmarks in the Rodinia benchmark suite, which was
due to the abstraction layer eliminating the
time-consuming initialization of OpenCL contexts. We
also evaluated different scheduling policies in VirtCL
with a real-world application (clsurf) and various
synthetic workload traces. The results indicated that
the VirtCL framework provides scalability for multiple
kernel tasks running on multi-GPU systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '15 conference proceedings.",
}
@Article{Younge:2015:SHP,
author = "Andrew J. Younge and John Paul Walters and Stephen P.
Crago and Geoffrey C. Fox",
title = "Supporting High Performance Molecular Dynamics in
Virtualized Clusters using {IOMMU}, {SR-IOV}, and
{GPUDirect}",
journal = j-SIGPLAN,
volume = "50",
number = "7",
pages = "31--38",
month = jul,
year = "2015",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2817817.2731194",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Tue Feb 16 12:01:42 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Cloud Infrastructure-as-a-Service paradigms have
recently shown their utility for a vast array of
computational problems, ranging from advanced web
service architectures to high throughput computing.
However, many scientific computing applications have
been slow to adapt to virtualized cloud frameworks.
This is due to performance impacts of virtualization
technologies, coupled with the lack of advanced
hardware support necessary for running many high
performance scientific applications at scale. By using
KVM virtual machines that leverage both Nvidia GPUs and
InfiniBand, we show that molecular dynamics simulations
with LAMMPS and HOOMD run at near-native speeds. This
experiment also illustrates how virtualized
environments can support the latest parallel computing
paradigms, including both MPI+CUDA and new GPUDirect
RDMA functionality. Specific findings show initial
promise in scaling of such applications to larger
production deployments targeting large scale
computational workloads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "VEE '15 conference proceedings.",
}
@Article{Zarrabi:2015:GSA,
author = "Amirreza Zarrabi and Khairulmizam Samsudin and Ettikan
K. Karuppiah",
title = "Gravitational search algorithm using {CUDA}: a case
study in high-performance metaheuristics",
journal = j-J-SUPERCOMPUTING,
volume = "71",
number = "4",
pages = "1277--1296",
month = apr,
year = "2015",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-014-1360-1",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Aug 8 12:23:10 MDT 2015",
bibsource = "http://link.springer.com/journal/11227/71/4;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-014-1360-1",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Zhu:2015:PIM,
author = "Xiangyuan Zhu and Kenli Li and Ahmad Salah and Lin Shi
and Keqin Li",
title = "Parallel implementation of {MAFFT} on {CUDA}-enabled
graphics hardware",
journal = j-TCBB,
volume = "12",
number = "1",
pages = "205--218",
month = jan,
year = "2015",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2014.2351801",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Fri Aug 28 05:40:09 MDT 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "Multiple sequence alignment (MSA) constitutes an
extremely powerful tool for many biological
applications including phylogenetic tree estimation,
secondary structure prediction, and critical residue
identification. However, aligning large biological
sequences with popular tools such as MAFFT requires
long runtimes on sequential architectures. Due to the
ever increasing sizes of sequence databases, there is
increasing demand to accelerate this task. In this
paper, we demonstrate how graphic processing units
(GPUs), powered by the compute unified device
architecture (CUDA), can be used as an efficient
computational platform to accelerate the MAFFT
algorithm. To fully exploit the GPU's capabilities for
accelerating MAFFT, we have optimized the sequence data
organization to eliminate the bandwidth bottleneck of
memory access, designed a memory allocation and reuse
strategy to make full use of limited memory of GPUs,
proposed a new modified-run-length encoding (MRLE)
scheme to reduce memory consumption, and used
high-performance shared memory to speed up I/O
operations. Our implementation tested in three NVIDIA
GPUs achieves speedup up to 11.28 on a Tesla K20m GPU
compared to the sequential MAFFT 7.015.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Zhu:2015:PML,
author = "Leqing Zhu and Yadong Zhou and Daxing Zhang and Dadong
Wang and Huiyan Wang and Xun Wang",
title = "Parallel multi-level {2D-DWT} on {CUDA GPUs} and its
application in ring artifact removal",
journal = j-CCPE,
volume = "27",
number = "17",
pages = "5188--5202",
day = "10",
month = dec,
year = "2015",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3559",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:20 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "5 Jun 2015",
}
@Article{Abdelfattah:2016:KOL,
author = "Ahmad Abdelfattah and David Keyes and Hatem Ltaief",
title = "{KBLAS}: an Optimized Library for Dense Matrix-Vector
Multiplication on {GPU} Accelerators",
journal = j-TOMS,
volume = "42",
number = "3",
pages = "18:1--18:31",
month = may,
year = "2016",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2818311",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon May 23 16:40:02 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "KBLAS is an open-source, high-performance library that
provides optimized kernels for a subset of Level 2 BLAS
functionalities on CUDA-enabled GPUs. Since performance
of dense matrix-vector multiplication is hindered by
the overhead of memory accesses, a double-buffering
optimization technique is employed to overlap data
motion with computation. After identifying a proper set
of tuning parameters, KBLAS efficiently runs on various
GPU architectures while avoiding code rewriting and
retaining compliance with the standard BLAS API.
Another optimization technique allows ensuring
coalesced memory access when dealing with submatrices,
especially for high-level dense linear algebra
algorithms. All KBLAS kernels have been leveraged to a
multi-GPU environment, which requires the introduction
of new APIs. Considering general matrices, KBLAS is
very competitive with existing state-of-the-art kernels
and provides a smoother performance across a wide range
of matrix dimensions. Considering symmetric and
Hermitian matrices, the KBLAS performance outperforms
existing state-of-the-art implementations on all matrix
sizes and achieves asymptotically up to 50\% and 60\%
speedup against the best competitor on single GPU and
multi-GPUs systems, respectively. Performance results
also validate our performance model. A subset of KBLAS
high-performance kernels have been integrated into
NVIDIA's standard BLAS implementation (cuBLAS) for
larger dissemination, starting from version 6.0.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Aji:2016:MAA,
author = "Ashwin M. Aji and Lokendra S. Panwar and Feng Ji and
Karthik Murthy and Milind Chabbi and Pavan Balaji and
Keith R. Bisset and James Dinan and Wu-chun Feng and
John Mellor-Crummey and Xiaosong Ma and Rajeev Thakur",
title = "{MPI-ACC}: Accelerator-Aware {MPI} for Scientific
Applications",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "5",
pages = "1401--1414",
month = may,
year = "2016",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2015.2446479",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Apr 15 13:45:22 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.computer.org/csdl/trans/td/2016/05/07127020-abs.html",
abstract-URL = "http://www.computer.org/csdl/trans/td/2016/05/07127020-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Aji:2016:MEA,
author = "Ashwin M. Aji and Antonio J. Pe{\~n}a and Pavan Balaji
and Wu-chun Feng",
title = "{MultiCL}: Enabling automatic scheduling for
task-parallel workloads in {OpenCL}",
journal = j-PARALLEL-COMPUTING,
volume = "58",
number = "??",
pages = "37--55",
month = oct,
year = "2016",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Sep 27 08:00:38 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819116300357",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Aldea:2016:OES,
author = "Sergio Aldea and Alvaro Estebanez and Diego R. Llanos
and Arturo Gonzalez-Escribano",
title = "An {OpenMP} Extension that Supports Thread-Level
Speculation",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "1",
pages = "78--91",
month = jan,
year = "2016",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2015.2393870",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Dec 15 09:28:10 MST 2015",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.computer.org/csdl/trans/td/2016/01/07014262-abs.html",
abstract-URL = "http://www.computer.org/csdl/trans/td/2016/01/07014262-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{AlQuraishi:2016:CBP,
author = "Eman AlQuraishi and Eman AlDwaisan and Alaa AlSaqaa
and Imtiaz Ahmad",
title = "A {CUDA}-based parallel implementation of a test
vectors encoding algorithm in compression-based scan
designs",
journal = j-INT-J-PAR-EMER-DIST-SYS,
volume = "31",
number = "3",
pages = "280--293",
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1080/17445760.2015.1016516",
ISSN = "1744-5760 (print), 1744-5779 (electronic)",
ISSN-L = "1744-5760",
bibdate = "Mon Sep 12 09:19:42 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.tandfonline.com/toc/gpaa20/31/3",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel, Emergent and
Distributed Systems: IJPEDS",
journal-URL = "http://www.tandfonline.com/loi/gpaa20",
onlinedate = "05 Mar 2015",
}
@Article{Andion:2016:LAA,
author = "Jos{\'e} M. Andi{\'o}n and Manuel Arenaz and
Fran{\c{c}}ois Bodin and Gabriel Rodr{\'\i}guez and
Juan Touri{\~n}o",
title = "Locality-Aware Automatic Parallelization for {GPGPU}
with {OpenHMPP} Directives",
journal = j-INT-J-PARALLEL-PROG,
volume = "44",
number = "3",
pages = "620--643",
month = jun,
year = "2016",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-015-0362-9",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Mon May 30 09:25:55 MDT 2016",
bibsource = "http://link.springer.com/journal/10766/44/3;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-015-0362-9",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Andujar:2016:OSF,
author = "Francisco J. And{\'u}jar and Juan A. Villar and
Francisco J. Alfaro and Jos{\'e} L. S{\'a}nchez and
Jesus Escudero-Sahuquillo",
title = "An open-source family of tools to reproduce
{MPI}-based workloads in interconnection network
simulators",
journal = j-J-SUPERCOMPUTING,
volume = "72",
number = "12",
pages = "4601--4628",
month = dec,
year = "2016",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1757-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:30 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/72/12;
http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Aubrey-Jones:2016:SMI,
author = "Tristan Aubrey-Jones and Bernd Fischer",
title = "Synthesizing {MPI} Implementations from Functional
Data-Parallel Programs",
journal = j-INT-J-PARALLEL-PROG,
volume = "44",
number = "3",
pages = "552--573",
month = jun,
year = "2016",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-015-0359-4",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Mon May 30 09:25:55 MDT 2016",
bibsource = "http://link.springer.com/journal/10766/44/3;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-015-0359-4",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Bader:2016:EMT,
author = "David A. Bader",
title = "Evolving {MPI+X} Toward Exascale",
journal = j-COMPUTER,
volume = "49",
number = "8",
pages = "10--10",
month = aug,
year = "2016",
CODEN = "CPTRB4",
ISSN = "0018-9162 (print), 1558-0814 (electronic)",
ISSN-L = "0018-9162",
bibdate = "Tue Aug 23 06:56:16 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computer2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://csdl.computer.org/csdl/mags/co/2016/08/mco2016080010.html",
abstract-URL = "http://csdl.computer.org/csdl/mags/co/2016/08/mco2016080010-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/portal/web/csdl/magazines/computer",
}
@Article{Batty:2016:OSA,
author = "Mark Batty and Alastair F. Donaldson and John
Wickerson",
title = "Overhauling {SC} atomics in {C11} and {OpenCL}",
journal = j-SIGPLAN,
volume = "51",
number = "1",
pages = "634--648",
month = jan,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2914770.2837637",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:57 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the conceptual simplicity of sequential
consistency (SC), the semantics of SC atomic operations
and fences in the C11 and OpenCL memory models is
subtle, leading to convoluted prose descriptions that
translate to complex axiomatic formalisations. We
conduct an overhaul of SC atomics in C11, reducing the
associated axioms in both number and complexity. A
consequence of our simplification is that the SC
operations in an execution no longer need to be totally
ordered. This relaxation enables, for the first time,
efficient and exhaustive simulation of litmus tests
that use SC atomics. We extend our improved C11 model
to obtain the first rigorous memory model formalisation
for OpenCL (which extends C11 with support for
heterogeneous many-core programming). In the OpenCL
setting, we refine the SC axioms still further to give
a sensible semantics to SC operations that employ a
`memory scope' to restrict their visibility to specific
threads. Our overhaul requires slight strengthenings of
both the C11 and the OpenCL memory models, causing some
behaviours to become disallowed. We argue that these
strengthenings are natural, and that all of the
formalised C11 and OpenCL compilation schemes of which
we are aware (Power and x86 CPUs for C11, AMD GPUs for
OpenCL) remain valid in our revised models. Using the
HERD memory model simulator, we show that our overhaul
leads to an exponential improvement in simulation time
for C11 litmus tests compared with the original model,
making *exhaustive* simulation competitive, time-wise,
with the *non-exhaustive* CDSChecker tool.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "POPL '16 conference proceedings.",
}
@Article{Bolis:2016:APA,
author = "A. Bolis and C. D. Cantwell and D. Moxey and D. Serson
and S. J. Sherwin",
title = "An adaptable parallel algorithm for the direct
numerical simulation of incompressible turbulent flows
using a {Fourier} spectral\slash $hp$ element method
and {MPI} virtual topologies",
journal = j-COMP-PHYS-COMM,
volume = "206",
number = "??",
pages = "17--25",
month = sep,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jun 10 18:27:25 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S001046551630100X",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Calore:2016:PPA,
author = "Enrico Calore and Alessandro Gabbana and Jiri Kraus
and Sebastiano Fabio Schifano and Raffaele
Tripiccione",
title = "Performance and portability of accelerated lattice
{Boltzmann} applications with {OpenACC}",
journal = j-CCPE,
volume = "28",
number = "12",
pages = "3485--3502",
day = "25",
month = aug,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3862",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Sep 13 08:30:12 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Chang:2016:APC,
author = "Chih-Hung Chang and Chih-Wei Lu and Chao-Tung Yang and
Tzu-Chieh Chang",
title = "An approach of performance comparisons with {OpenMP}
and {CUDA} parallel programming on multicore systems",
journal = j-CCPE,
volume = "28",
number = "16",
pages = "4230--4245",
month = nov,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3829",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Nov 17 07:11:02 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Chang:2016:DLD,
author = "Li-Wen Chang and Hee-Seok Kim and Wen-mei W. Hwu",
title = "{DySel}: Lightweight Dynamic Selection for
Kernel-based Data-parallel Programming Model",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "667--680",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872373",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The rising pressure for simultaneously improving
performance and reducing power is driving more
diversity into all aspects of computing devices. An
algorithm that is well-matched to the target hardware
can run multiple times faster and more energy
efficiently than one that is not. The problem is
complicated by the fact that a program's input also
affects the appropriate choice of algorithm. As a
result, software developers have been faced with the
challenge of determining the appropriate algorithm for
each potential combination of target device and data.
This paper presents DySel, a novel runtime system for
automating such determination for kernel-based data
parallel programming models such as OpenCL, CUDA,
OpenACC, and C++AMP. These programming models cover
many applications that demand high performance in
mobile, cloud and high-performance computing. DySel
systematically deploys candidate kernels on a small
portion of the actual data to determine which achieves
the best performance for the hardware-data combination.
The test-deployment, referred to as micro-profiling,
contributes to the final execution result and incurs
less than 8\% of overhead in the worst observed case
when compared to an oracle. We show four major use
cases where DySel provides significantly more
consistent performance without tedious effort from the
developer.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Cores:2016:ROM,
author = "Iv{\'a}n Cores and M{\'o}nica Rodr{\'\i}guez and
Patricia Gonz{\'a}lez and Mar{\'\i}a J. Mart{\'\i}n",
title = "Reducing the overhead of an {MPI} application-level
migration approach",
journal = j-PARALLEL-COMPUTING,
volume = "54",
number = "??",
pages = "72--82",
month = may,
year = "2016",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Wed May 4 17:36:47 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819116000429",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Creech:2016:TSS,
author = "Timothy Creech and Rajeev Barua",
title = "Transparently Space Sharing a Multicore Among Multiple
Processes",
journal = j-TOPC,
volume = "3",
number = "3",
pages = "17:1--17:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/3001910",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Mon Dec 26 17:40:41 MST 2016",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "As hardware becomes increasingly parallel and the
availability of scalable parallel software improves,
the problem of managing multiple multithreaded
applications (processes) becomes important. Malleable
processes, which can vary the number of threads used as
they run, enable sophisticated and flexible resource
management. Although many existing applications
parallelized for SMPs with parallel runtimes are in
fact already malleable, deployed runtime environments
provide no interface nor any strategy for intelligently
allocating hardware threads or even preventing
oversubscription. Prior research methods either depend
on profiling applications ahead of time to make good
decisions about allocations or do not account for
process efficiency at all, leading to poor performance.
None of these prior methods have been adapted widely in
practice. This article presents the Scheduling and
Allocation with Feedback (SCAF) system: a drop-in
runtime solution that supports existing malleable
applications in making intelligent allocation decisions
based on observed efficiency without any changes to
semantics, program modification, offline profiling, or
even recompilation. Our existing implementation can
control most unmodified OpenMP applications. Other
malleable threading libraries can also easily be
supported with small modifications without requiring
application modification or recompilation. In this
work, we present the SCAF daemon and a SCAF-aware port
of the GNU OpenMP runtime. We present a new technique
for estimating process efficiency purely at runtime
using available hardware counters and demonstrate its
effectiveness in aiding allocation decisions. We
evaluated SCAF using NAS NPB parallel benchmarks on
five commodity parallel platforms, enumerating
architectural features and their effects on our scheme.
We measured the benefit of SCAF in terms of sum of
speedups improvement (a common metric for
multiprogrammed environments) when running all
benchmark pairs concurrently compared to
equipartitioning-the best existing competing scheme in
the literature. We found that SCAF improves on
equipartitioning on four out of five machines, showing
a mean improvement factor in sum of speedups of 1.04 to
1.11x for benchmark pairs, depending on the machine,
and 1.09x on average. Since we are not aware of any
widely available tool for equipartitioning, we also
compare SCAF against multiprogramming using unmodified
OpenMP, which is the only environment available to end
users today. SCAF improves on the unmodified OpenMP
runtimes for all five machines, with a mean improvement
of 1.08 to 2.07x, depending on the machine, and 1.59x
on average.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Creel:2016:NJM,
author = "Michael Creel",
title = "A Note on {Julia} and {MPI}, with Code Examples",
journal = j-COMP-ECONOMICS,
volume = "48",
number = "3",
pages = "??--??",
month = "",
year = "2016",
CODEN = "CNOMEL",
DOI = "https://doi.org/10.1007/s10614-015-9516-5",
ISSN = "",
ISSN-L = "0927-7099",
bibdate = "Fri Apr 9 07:54:52 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/julia.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10614-015-9516-5",
acknowledgement = ack-nhfb,
fjournal = "Computational Economics",
}
@Book{Czech:2016:IPC,
author = "Zbigniew J. Czech",
title = "Introduction to Parallel Computing",
publisher = pub-CAMBRIDGE,
address = pub-CAMBRIDGE:adr,
pages = "xvii + 354",
year = "2016",
DOI = "https://doi.org/10.1017/9781316795835",
ISBN = "1-107-17439-2 (hardcover), 1-316-79583-7 (e-book)",
ISBN-13 = "978-1-107-17439-9 (hardcover), 978-1-316-79583-5
(e-book)",
LCCN = "QA76.58 .C975 2016",
bibdate = "Fri Mar 31 11:22:52 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib;
z3950.loc.gov:7090/Voyager",
abstract = "The constantly increasing demand for more computing
power can seem impossible to keep up with. However,
multicore processors capable of performing computations
in parallel allow computers to tackle ever larger
problems in a wide variety of applications. This book
provides a comprehensive introduction to parallel
computing, discussing theoretical issues such as the
fundamentals of concurrent processes, models of
parallel and distributed computing, and metrics for
evaluating and comparing parallel algorithms, as well
as practical issues, including methods of designing and
implementing shared- and distributed-memory programs,
and standards for parallel program implementation, in
particular MPI and OpenMP interfaces. Each chapter
presents the basics in one place followed by advanced
topics, allowing novices and experienced practitioners
to quickly find what they need. A glossary and more
than 80 exercises with selected solutions aid
comprehension. The book is recommended as a text for
advanced undergraduate or graduate students and as a
reference for practitioners.",
acknowledgement = ack-nhfb,
subject = "Parallel processing (Electronic computers)",
tableofcontents = "Concurrent processes \\
Basic models of parallel computation \\
Elementary parallel algorithms \\
Designing parallel algorithms \\
Architectures of parallel computers \\
Message-passing programming \\
Shared-memory programming",
}
@Article{Dathathri:2016:CAL,
author = "Roshan Dathathri and Ravi Teja Mullapudi and Uday
Bondhugula",
title = "Compiling Affine Loop Nests for a Dynamic Scheduling
Runtime on Shared and Distributed Memory",
journal = j-TOPC,
volume = "3",
number = "2",
pages = "12:1--12:??",
month = aug,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2948975",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Fri Sep 23 15:24:52 MDT 2016",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "Current de-facto parallel programming models like
OpenMP and MPI make it difficult to extract task-level
dataflow parallelism as opposed to bulk-synchronous
parallelism. Task parallel approaches that use
point-to-point synchronization between dependent tasks
in conjunction with dynamic scheduling dataflow
runtimes are thus becoming attractive. Although good
performance can be extracted for both shared and
distributed memory using these approaches, there is
little compiler support for them. In this article, we
describe the design of compiler--runtime interaction to
automatically extract coarse-grained dataflow
parallelism in affine loop nests for both shared and
distributed-memory architectures. We use techniques
from the polyhedral compiler framework to extract tasks
and generate components of the runtime that are used to
dynamically schedule the generated tasks. The runtime
includes a distributed decentralized scheduler that
dynamically schedules tasks on a node. The schedulers
on different nodes cooperate with each other through
asynchronous point-to-point communication, and all of
this is achieved by code automatically generated by the
compiler. On a set of six representative affine loop
nest benchmarks, while running on 32 nodes with 8
threads each, our compiler-assisted runtime yields a
geometric mean speedup of $ 143.6 \times $ ($ 70.3
\times $ to $ 474.7 \times $) over the sequential
version and a geometric mean speedup of $ 1.64 \times $
($ 1.04 \times $ to $ 2.42 \times $) over the
state-of-the-art automatic parallelization approach
that uses bulk synchronization. We also compare our
system with past work that addresses some of these
challenges on shared memory, and an emerging runtime
(Intel Concurrent Collections) that demands higher
programmer input and effort in parallelizing. To the
best of our knowledge, ours is also the first automatic
scheme that allows for dynamic scheduling of affine
loop nests on a cluster of multicores.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Deniz:2016:MGM,
author = "Etem Deniz and Alper Sen",
title = "{MINIME-GPU}: Multicore Benchmark Synthesizer for
{GPUs}",
journal = j-TACO,
volume = "12",
number = "4",
pages = "34:1--34:??",
month = jan,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2818693",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Tue Feb 16 15:36:38 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "We introduce MINIME-GPU, a novel automated benchmark
synthesis framework for graphics processing units
(GPUs) that serves to speed up architectural simulation
of modern GPU architectures. Our framework captures
important characteristics of original GPU applications
and generates synthetic GPU benchmarks using the Open
Computing Language (OpenCL) library from those
applications. To the best of our knowledge, this is the
first time synthetic OpenCL benchmarks for GPUs are
generated from existing applications. We use several
characteristics, including instruction throughput,
compute unit occupancy, and memory efficiency, to
compare the similarity of original applications and
their corresponding synthetic benchmarks. The
experimental results show that our synthetic benchmark
generation framework is capable of generating synthetic
benchmarks that have similar characteristics with the
original applications from which they are generated. On
average, the similarity (accuracy) is 96\% and the
speedup is 541 $ \times $ . In addition, our synthetic
benchmarks use the OpenCL library, which allows us to
obtain portable human readable benchmarks as opposed to
using assembly-level code, and they are faster and
smaller than the original applications from which they
are generated. We experimentally validated that our
synthetic benchmarks preserve the characteristics of
the original applications across different
architectures.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Dinan:2016:IEM,
author = "James Dinan and Pavan Balaji and Darius Buntinas and
David Goodell and William Gropp and Rajeev Thakur",
title = "An implementation and evaluation of the {MPI 3.0}
one-sided communication interface",
journal = j-CCPE,
volume = "28",
number = "17",
pages = "4385--4404",
day = "10",
month = dec,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3758",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Nov 17 07:11:03 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{DiPietro:2016:CLD,
author = "Roberto {Di Pietro} and Flavio Lombardi and Antonio
Villani",
title = "{CUDA} Leaks: a Detailed Hack for {CUDA} and a
(Partial) Fix",
journal = j-TECS,
volume = "15",
number = "1",
pages = "15:1--15:??",
month = feb,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2801153",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Wed Jun 8 09:43:30 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics processing units (GPUs) are increasingly
common on desktops, servers, and embedded platforms. In
this article, we report on new security issues related
to CUDA, which is the most widespread platform for GPU
computing. In particular, details and proofs-of-concept
are provided about novel vulnerabilities to which CUDA
architectures are subject. We show how such
vulnerabilities can be exploited to cause severe
information leakage. As a case study, we experimentally
show how to exploit one of these vulnerabilities on a
GPU implementation of the AES encryption algorithm.
Finally, we also suggest software patches and
alternative approaches to tackle the presented
vulnerabilities.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840",
}
@Article{Eckert:2016:HAL,
author = "C. H. J. Eckert and E. Zenker and M. Bussmann and D.
Albach",
title = "{HASEonGPU} --- an adaptive, load-balanced {MPI\slash
GPU}-code for calculating the amplified spontaneous
emission in high power laser media",
journal = j-COMP-PHYS-COMM,
volume = "207",
number = "??",
pages = "362--374",
month = oct,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Aug 30 18:08:51 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465516301436",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Fabeiro:2016:WPP,
author = "Jorge F. Fabeiro and Diego Andrade and Basilio B.
Fraguela",
title = "Writing a performance-portable matrix multiplication",
journal = j-PARALLEL-COMPUTING,
volume = "52",
number = "??",
pages = "65--77",
month = feb,
year = "2016",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2015.12.005",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 12 18:56:20 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819115001611",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
keywords = "GPU; Heterogeneous Programming Library (HPL); Intel
Xeon Phi; MPI; OpenCL; OpenMP",
}
@Article{Gong:2016:NPG,
author = "Jing Gong and Stefano Markidis and Erwin Laure and
Matthew Otten and Paul Fischer and Misun Min",
title = "Nekbone performance on {GPUs} with {OpenACC} and
{CUDA} {Fortran} implementations",
journal = j-J-SUPERCOMPUTING,
volume = "72",
number = "11",
pages = "4160--4180",
month = nov,
year = "2016",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1744-5",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:30 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/72/11;
http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Guang:2016:NMN,
author = "Suo Guang",
title = "{NR-MPI}: A Non-stop and Fault Resilient {MPI}
Supporting Programmer Defined Data Backup and Restore
for {E}-scale Super Computing Systems",
journal = j-SUPERFRI,
volume = "3",
number = "1",
pages = "4--21",
month = "????",
year = "2016",
CODEN = "????",
ISSN = "2409-6008 (print), 2313-8734 (electronic)",
bibdate = "Sat Nov 11 07:15:27 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/superfri.bib",
URL = "http://superfri.org/superfri/article/view/89",
acknowledgement = ack-nhfb,
fjournal = "Supercomputing Frontiers and Innovations",
journal-URL = "http://superfri.org/superfri/issue/archive",
}
@Article{Hamidouche:2016:CAO,
author = "Khaled Hamidouche and Akshay Venkatesh and Ammar Ahmad
Awan and Hari Subramoni and Ching-Hsiang Chu and
Dhabaleswar K. Panda",
title = "{CUDA}-Aware {OpenSHMEM}: Extensions and Designs for
High Performance {OpenSHMEM} on {GPU} Clusters",
journal = j-PARALLEL-COMPUTING,
volume = "58",
number = "??",
pages = "27--36",
month = oct,
year = "2016",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Sep 27 08:00:38 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819116300345",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Haque:2016:ACV,
author = "Syed Arefinul Haque and Salekul Islam and Md. Jahidul
Islam and Jean-Charles Gr{\'e}goire",
title = "An architecture for client virtualization: a case
study",
journal = j-COMP-NET-AMSTERDAM,
volume = "100",
number = "??",
pages = "75--89",
day = "8",
month = may,
year = "2016",
CODEN = "????",
ISSN = "1389-1286 (print), 1872-7069 (electronic)",
ISSN-L = "1389-1286",
bibdate = "Thu May 12 08:55:09 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compnetamsterdam2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S1389128616300421",
acknowledgement = ack-nhfb,
fjournal = "Computer Networks (Amsterdam, Netherlands: 1999)",
journal-URL = "http://www.sciencedirect.com/science/journal/13891286/",
}
@Article{Hariri:2016:PPA,
author = "F. Hariri and T. M. Tran and A. Jocksch and E. Lanti
and J. Progsch and P. Messmer and S. Brunner and C.
Gheller and L. Villard",
title = "A portable platform for accelerated {PIC} codes and
its application to {GPUs} using {OpenACC}",
journal = j-COMP-PHYS-COMM,
volume = "207",
number = "??",
pages = "69--82",
month = oct,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Aug 30 18:08:51 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465516301242",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Hu:2016:CLG,
author = "Liang Hu and Xilong Che and Si-Qing Zheng",
title = "A Closer Look at {GPGPU}",
journal = j-COMP-SURV,
volume = "48",
number = "4",
pages = "60:1--60:??",
month = may,
year = "2016",
CODEN = "CMSVAN",
DOI = "https://doi.org/10.1145/2873053",
ISSN = "0360-0300 (print), 1557-7341 (electronic)",
ISSN-L = "0360-0300",
bibdate = "Mon May 2 16:19:12 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/surveys/;
http://www.math.utah.edu/pub/tex/bib/compsurv.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "The lack of detailed white box illustration leaves a
gap in the field of GPGPU (General-Purpose Computing on
the Graphic Processing Unit), thus hindering users and
researchers from exploring hardware potential while
improving application performance. This article bridges
the gap by demystifying the micro-architecture and
operating mechanism of GPGPU. We propose a descriptive
model that addresses key issues of most concerns,
including task organization, hardware structure,
scheduling mechanism, execution mechanism, and memory
access. We also validate the effectiveness of our model
by interpreting the software/hardware cooperation of
CUDA.",
acknowledgement = ack-nhfb,
articleno = "60",
fjournal = "ACM Computing Surveys",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J204",
}
@Article{Hung:2016:EBP,
author = "Che-Lun Hung and Chun-Yuan Lin and Chia-Shin Ou and
Yuan-Hong Tseng and Po-Yen Hung and Ship-Peng Li and
Chun-Ting Fu",
title = "Efficient bit-parallel subcircuit extraction using
{CUDA}",
journal = j-CCPE,
volume = "28",
number = "16",
pages = "4326--4338",
month = nov,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3732",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Nov 17 07:11:02 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Hunold:2016:RMB,
author = "Sascha Hunold and Alexandra Carpen-Amarie",
title = "Reproducible {MPI} Benchmarking is Still Not as Easy
as You Think",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "12",
pages = "3617--3630",
month = dec,
year = "2016",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Nov 16 18:43:09 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2016/12/07426807-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Ibanez:2016:HMT,
author = "Dan Ibanez and Ian Dunn and Mark S. Shephard",
title = "Hybrid {MPI}-thread parallelization of adaptive mesh
operations",
journal = j-PARALLEL-COMPUTING,
volume = "52",
number = "??",
pages = "133--143",
month = feb,
year = "2016",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 12 18:56:20 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819116000041",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Iida:2016:GET,
author = "Yuki Iida and Yusuke Fujii and Takuya Azumi and
Nobuhiko Nishio and Shinpei Kato",
title = "{GPUrpc}: Exploring Transparent Access to Remote
{GPUs}",
journal = j-TECS,
volume = "16",
number = "1",
pages = "17:1--17:??",
month = nov,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950056",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Nov 3 16:48:38 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Graphics processing units (GPUs) are increasingly used
for high-performance computing. Programming frameworks
for general-purpose computing on GPUs (GPGPU), such as
CUDA and OpenCL, are also maturing. Driving this trend
is the recent proliferation of mobile devices such as
smartphones and wearable computers. These devices are
increasingly incorporating computationally intensive
applications that involve some form of environmental
recognition such as augmented reality (AR) or voice
recognition. However, devices with low computational
power cannot satisfy such demanding computing
requirements. The CPU load of these devices could be
reduced by offloading computation onto GPUs on the
cloud. This paper presents GPUrpc, a remote procedure
call (RPC) extension to Gdev, which is a rich set of
runtime libraries and device drivers for achieving
first-class GPU resource management. GPUrpc allows
developers to use CUDA for GPGPU development work.
Existing research uses RPCs based on the CUDA
application programming interfaces (APIs); hence, all
CUDA APIs require communication. To reduce
communication overhead, we use an RPC based on a
low-level API than CUDA API and reduced API that does
not require communication. Our evaluation conducted on
Linux and NVIDIA GPUs shows that the basic performance
of our prototype implementation is reliable in
comparison with the existing method. Evaluation using
the Rodinia benchmark suite designed for research in
heterogeneous parallel computing showed that GPUrpc is
effective for applications such as image processing and
data mining. GPUrpc also can improve power consumption
to approximately 1/6 that of CPU processing for
performing $ 512 \times 512 $ matrix multiplication.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840",
remark = "Special issue on VIPES, special issue on ICESS2015 and
regular papers.",
}
@Article{Ilie:2016:AEC,
author = "Silvana Ilie and Arne Storjohann",
title = "Abstracts of the {2015 East Coast Computer Algebra
Day}",
journal = j-ACM-COMM-COMP-ALGEBRA,
volume = "50",
number = "1",
pages = "35--39",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2930964.2930969",
ISSN = "1932-2232 (print), 1932-2240 (electronic)",
ISSN-L = "1932-2232",
bibdate = "Wed Apr 27 16:14:51 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigsam.bib",
abstract = "In the past decade, the introduction of low-level
heterogeneous programming models, in particular CUDA,
has brought supercomputing to the level of the desktop
computer. However, these models bring notable
challenges, even to expert programmers. Indeed, fully
exploiting the power of hardware accelerators with
CUDA-like code often requires significant code
optimization e.ort. While this development can
certainly yield high performance, it is desirable for
some programmers to avoid the explicit management of
device initialization and data transfer between memory
levels. To this end, high-level models for accelerator
programming, like OpenMP and OpenACC, have become an
important research direction. With these models,
programmers only need to annotate their C/C++ code to
indicate which code portion is to be executed on the
device and how data maps between host and device.",
acknowledgement = ack-nhfb,
fjournal = "ACM Communications in Computer Algebra",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1000",
}
@Article{Islam:2016:EMT,
author = "Tanzima Islam and Kathryn Mohror and Martin Schulz",
title = "Exploring the {MPI} tool information interface:
features and capabilities",
journal = j-IJHPCA,
volume = "30",
number = "2",
pages = "212--222",
year = "2016",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342015600507",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Apr 4 14:51:30 MDT 2017",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/full/10.1177/1094342015600507",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = may,
}
@Article{Kannan:2016:HPP,
author = "Ramakrishnan Kannan and Grey Ballard and Haesun Park",
title = "A high-performance parallel algorithm for nonnegative
matrix factorization",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "9:1--9:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851152",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Non-negative matrix factorization (NMF) is the problem
of determining two non-negative low rank factors W and
H, for the given input matrix A, such that A \approx
WH. NMF is a useful tool for many applications in
different domains such as topic modeling in text
mining, background separation in video analysis, and
community detection in social networks. Despite its
popularity in the data mining community, there is a
lack of efficient distributed algorithms to solve the
problem for big data sets. We propose a
high-performance distributed-memory parallel algorithm
that computes the factorization by iteratively solving
alternating non-negative least squares (NLS)
subproblems for W and H. It maintains the data and
factor matrices in memory (distributed across
processors), uses MPI for interprocessor communication,
and, in the dense case, provably minimizes
communication costs (under mild assumptions). As
opposed to previous implementations, our algorithm is
also flexible: (1) it performs well for both dense and
sparse matrices, and (2) it allows the user to choose
any one of the multiple algorithms for solving the
updates to low rank factors W and H within the
alternating iterations. We demonstrate the scalability
of our algorithm and compare it with baseline
implementations, showing significant performance
improvements.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Kim:2016:DOF,
author = "Junghyun Kim and Gangwon Jo and Jaehoon Jung and
Jungwon Kim and Jaejin Lee",
title = "A distributed {OpenCL} framework using redundant
computation and data replication",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "553--569",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908094",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Applications written solely in OpenCL or CUDA cannot
execute on a cluster as a whole. Most previous
approaches that extend these programming models to
clusters are based on a common idea: designating a
centralized host node and coordinating the other nodes
with the host for computation. However, the centralized
host node is a serious performance bottleneck when the
number of nodes is large. In this paper, we propose a
scalable and distributed OpenCL framework called
SnuCL-D for large-scale clusters. SnuCL-D's remote
device virtualization provides an OpenCL application
with an illusion that all compute devices in a cluster
are confined in a single node. To reduce the amount of
control-message and data communication between nodes,
SnuCL-D replicates the OpenCL host program execution
and data in each node. We also propose a new OpenCL
host API function and a queueing optimization technique
that significantly reduce the overhead incurred by the
previous centralized approaches. To show the
effectiveness of SnuCL-D, we evaluate SnuCL-D with a
microbenchmark and eleven benchmark applications on a
large-scale CPU cluster and a medium-scale GPU
cluster.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '16 conference proceedings.",
}
@Article{Kobayashi:2016:HSV,
author = "Ryohei Kobayashi and Tomohiro Misono and Kenji Kise",
title = "A High-speed {Verilog} {HDL} Simulation Method using a
Lightweight Translator",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "4",
pages = "26--31",
month = sep,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3039902.3039908",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:44 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Designing with Hardware Description Languages (HDLs)
is still the de facto standard way to develop
FPGA-based custom computing systems, and RTL simulation
is an important step in ensuring that the designed
hardware behavior meets the design specification. In
this paper, we propose a new high-speed Verilog HDL
simulation method. It is based on two previously
proposed techniques: ArchHDL and Pyverilog. ArchHDL is
used as a simulation engine in the method because the
RTL simulation provided by ArchHDL can be parallelized
with OpenMP. We use Pyverilog to develop a code
translator to convert Verilog HDL source code into
ArchHDL code, and due to this, the translator can be
realized and its implementation is lightweight. We
compare the proposed method with Synopsys VCS, and the
experimental results show that the RTL simulation
behavior and speed are same as that of Synopsys VCS and
up to 5.8x better respectively.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "HEART '16 conference proceedings.",
}
@Article{Koitka:2016:NGA,
author = "Sven Koitka and Christoph M. Friedrich",
title = "\pkg{nmfgpu4R}: {GPU}-Accelerated Computation of the
Non-Negative Matrix Factorization {(NMF)} Using {CUDA}
Capable Hardware",
journal = j-R-JOURNAL,
volume = "8",
number = "2",
pages = "382--392",
month = dec,
year = "2016",
DOI = "https://doi.org/10.32614/rj-2016-053",
ISSN = "2073-4859",
ISSN-L = "2073-4859",
bibdate = "Fri May 21 06:58:41 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/rjournal.bib",
URL = "https://journal.r-project.org/archive/2016/RJ-2016-053",
acknowledgement = ack-nhfb,
fjournal = "The R Journal",
journal-URL = "http://journal.r-project.org/",
}
@Article{Kolesnichenko:2016:CBG,
author = "Alexey Kolesnichenko and Christopher M. Poskitt and
Sebastian Nanz and Bertrand Meyer",
title = "Contract-based general-purpose {GPU} programming",
journal = j-SIGPLAN,
volume = "51",
number = "3",
pages = "75--84",
month = mar,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2936314.2814216",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:58 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Using GPUs as general-purpose processors has
revolutionized parallel computing by offering, for a
large and growing set of algorithms, massive
data-parallelization on desktop machines. An obstacle
to widespread adoption, however, is the difficulty of
programming them and the low-level control of the
hardware required to achieve good performance. This
paper suggests a programming library, SafeGPU, that
aims at striking a balance between programmer
productivity and performance, by making GPU
data-parallel operations accessible from within a
classical object-oriented programming language. The
solution is integrated with the design-by-contract
approach, which increases confidence in functional
program correctness by embedding executable program
specifications into the program text. We show that our
library leads to modular and maintainable code that is
accessible to GPGPU non-experts, while providing
performance that is comparable with hand-written CUDA
code. Furthermore, runtime contract checking turns out
to be feasible, as the contracts can be executed on the
GPU.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "GPCE '15 conference proceedings.",
}
@Article{Kulkarni:2016:HAP,
author = "Kedar Kulkarni and Shreeya Badhe and Geetanjali
Gadre",
title = "{HCA} aware Parallel Communication Library: A
feasibility study for offloading {MPI} requirements",
journal = j-SUPERFRI,
volume = "3",
number = "3",
pages = "56--60",
month = "????",
year = "2016",
CODEN = "????",
ISSN = "2409-6008 (print), 2313-8734 (electronic)",
bibdate = "Sat Nov 11 07:15:27 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/superfri.bib",
URL = "http://superfri.org/superfri/article/view/109",
acknowledgement = ack-nhfb,
fjournal = "Supercomputing Frontiers and Innovations",
journal-URL = "http://superfri.org/superfri/issue/archive",
}
@Article{Kutyniok:2016:SFD,
author = "Gitta Kutyniok and Wang-Q Lim and Rafael Reisenhofer",
title = "{ShearLab $3$D}: Faithful Digital Shearlet Transforms
Based on Compactly Supported Shearlets",
journal = j-TOMS,
volume = "42",
number = "1",
pages = "5:1--5:42",
month = feb,
year = "2016",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2740960",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Mar 1 17:07:56 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "Wavelets and their associated transforms are highly
efficient when approximating and analyzing
one-dimensional signals. However, multivariate signals
such as images or videos typically exhibit curvilinear
singularities, which wavelets are provably deficient in
sparsely approximating and also in analyzing in the
sense of, for instance, detecting their direction.
Shearlets are a directional representation system
extending the wavelet framework, which overcomes those
deficiencies. Similar to wavelets, shearlets allow a
faithful implementation and fast associated transforms.
In this article, we will introduce a comprehensive
carefully documented software package coined ShearLab
3D (www.ShearLab.org) and discuss its algorithmic
details. This package provides MATLAB code for a novel
faithful algorithmic realization of the 2D and 3D
shearlet transform (and their inverses) associated with
compactly supported universal shearlet systems
incorporating the option of using CUDA. We will present
extensive numerical experiments in 2D and 3D concerning
denoising, inpainting, and feature extraction,
comparing the performance of ShearLab 3D with similar
transform-based algorithms such as curvelets,
contourlets, or surfacelets. In the spirit of
reproducible research, all scripts are accessible on
www.ShearLab.org.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Laguna:2016:EEU,
author = "Ignacio Laguna and David F. Richards and Todd Gamblin
and Martin Schulz and Bronis R. de Supinski and Kathryn
Mohror and Howard Pritchard",
title = "Evaluating and extending user-level fault tolerance in
{MPI} applications",
journal = j-IJHPCA,
volume = "30",
number = "3",
pages = "305--319",
year = "2016",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342015623623",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Apr 4 14:51:30 MDT 2017",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/full/10.1177/1094342015623623",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = aug,
}
@Article{Langr:2016:ASM,
author = "Daniel Langr and Pavel Tvrdik and Ivan Simecek",
title = "{AQsort}: Scalable Multi-Array In-Place Sorting with
{OpenMP}",
journal = j-SCPE,
volume = "17",
number = "4",
pages = "369--391",
month = "????",
year = "2016",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:48 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1207",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Lashgar:2016:ESM,
author = "Ahmad Lashgar and Amirali Baniasadi",
title = "Employing Software-Managed Caches in {OpenACC}:
Opportunities and Benefits",
journal = j-TOMPECS,
volume = "1",
number = "1",
pages = "2:1--2:34",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2798724",
ISSN = "2376-3639 (print), 2376-3647 (electronic)",
ISSN-L = "2376-3639",
bibdate = "Thu Jun 15 12:29:10 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
URL = "http://dl.acm.org/citation.cfm?id=2798724",
abstract = "The OpenACC programming model has been developed to
simplify accelerator programming and improve
development productivity. In this article, we
investigate the main limitations faced by OpenACC in
harnessing all capabilities of GPU-like accelerators.
We build on our findings and discuss the opportunity to
exploit a software-managed cache as (i) a fast
communication medium and (ii) a cache for data reuse.
To this end, we propose a new directive and
communication model for OpenACC. Investigating several
benchmarks, we show that the proposed directive can
improve performance up to $ 2.54 \times $, and at the
cost of minor programming effort.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Modeling and Performance
Evaluation of Computing Systems (TOMPECS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J1525",
}
@Article{Lin:2016:VDF,
author = "Yu-Te Lin and Jenq-Kuen Lee",
title = "Vector data flow analysis for {SIMD} optimizations on
{OpenCL} programs",
journal = j-CCPE,
volume = "28",
number = "5",
pages = "1629--1654",
day = "10",
month = apr,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3714",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sun Apr 3 12:34:13 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "24 Oct 2015",
}
@Article{Liu:2016:MBM,
author = "Weifeng Liu and Michael Gerndt and Bin Gong",
title = "Model-based {MPI-IO} tuning with {Periscope} tuning
framework",
journal = j-CCPE,
volume = "28",
number = "1",
pages = "3--20",
month = jan,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3603",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:21 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "14 Aug 2015",
}
@Article{Lobeiras:2016:DEI,
author = "Jacobo Lobeiras and Margarita Amor and Ramon Doallo",
title = "Designing Efficient Index-Digit Algorithms for {CUDA}
{GPU} Architectures",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "5",
pages = "1331--1343",
month = may,
year = "2016",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2015.2450718",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Apr 15 13:45:22 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.computer.org/csdl/trans/td/2016/05/07138631-abs.html",
abstract-URL = "http://www.computer.org/csdl/trans/td/2016/05/07138631-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Loncar:2016:CPS,
author = "Vladimir Loncar and Antun Balaz and Aleksandar
Bogojevi{\'c} and Srdjan Skrbi{\'c} and Paulsamy
Muruganandam and Sadhan K. Adhikari",
title = "{CUDA} programs for solving the time-dependent dipolar
{Gross--Pitaevskii} equation in an anisotropic trap",
journal = j-COMP-PHYS-COMM,
volume = "200",
number = "??",
pages = "406--410",
month = mar,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu Jan 21 15:04:34 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515004361",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Loncar:2016:OOM,
author = "Vladimir Loncar and Luis E. Young-S. and Srdjan
Skrbi{\'c} and Paulsamy Muruganandam and Sadhan K.
Adhikari and Antun Balaz",
title = "{OpenMP}, {OpenMP\slash MPI}, and {CUDA\slash MPI} {C}
programs for solving the time-dependent dipolar
{Gross--Pitaevskii} equation",
journal = j-COMP-PHYS-COMM,
volume = "209",
number = "??",
pages = "190--196",
month = dec,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Oct 18 17:55:23 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465516302272",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Maleki:2016:HOT,
author = "Sepideh Maleki and Annie Yang and Martin Burtscher",
title = "Higher-order and tuple-based massively-parallel prefix
sums",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "539--552",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908089",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Prefix sums are an important parallel primitive,
especially in massively-parallel programs. This paper
discusses two orthogonal generalizations thereof, which
we call higher-order and tuple-based prefix sums.
Moreover, it describes and evaluates SAM, a
GPU-friendly algorithm for computing prefix sums and
other scans that directly supports higher orders and
tuple values. Its templated CUDA implementation unifies
all of these computations in a single 100-statement
kernel. SAM is communication-efficient in the sense
that it minimizes main-memory accesses. When computing
prefix sums of a million or more values, it outperforms
Thrust and CUDPP on both a Titan X and a K40 GPU. On
the Titan X, SAM reaches memory-copy speeds for large
input sizes, which cannot be surpassed. SAM outperforms
CUB, the currently fastest conventional prefix sum
implementation, by up to a factor of 2.9 on
eighth-order prefix sums and by up to a factor of 2.6
on eight-tuple prefix sums.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '16 conference proceedings.",
}
@Article{Mallon:2016:MUB,
author = "Dami{\'a}n A. Mall{\'o}n and Guillermo L. Taboada and
Lars Koesterke",
title = "{MPI} and {UPC} broadcast, scatter and gather
algorithms in {Xeon Phi}",
journal = j-CCPE,
volume = "28",
number = "8",
pages = "2322--2340",
day = "10",
month = jun,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3552",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Jun 8 06:47:20 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Manca:2016:CQI,
author = "Emanuele Manca and Andrea Manconi and Alessandro Orro
and Giuliano Armano and Luciano Milanesi",
title = "{CUDA-quicksort}: an improved {GPU}-based
implementation of quicksort",
journal = j-CCPE,
volume = "28",
number = "1",
pages = "21--43",
month = jan,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3611",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:21 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "12 Aug 2015",
}
@Article{Marendic:2016:NMR,
author = "P. Marendic and J. Lemeire and D. Vucinic and P.
Schelkens",
title = "A novel {MPI} reduction algorithm resilient to
imbalances in process arrival times",
journal = j-J-SUPERCOMPUTING,
volume = "72",
number = "5",
pages = "1973--2013",
month = may,
year = "2016",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1707-x",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon May 30 09:17:38 MDT 2016",
bibsource = "http://link.springer.com/journal/11227/72/5;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s11227-016-1707-x",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Book{Matloff:2016:PCD,
author = "Norman S. Matloff",
title = "Parallel Computing for Data Science: with Examples in
{R}, {C++} and {CUDA}",
volume = "28",
publisher = pub-CRC,
address = pub-CRC:adr,
pages = "xxiii + 324",
year = "2016",
ISBN = "1-4665-8701-6 (hardcover)",
ISBN-13 = "978-1-4665-8701-4 (hardcover)",
LCCN = "QA76.642 M37 2016",
bibdate = "Sat Jun 27 09:13:41 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/s-plus.bib;
z3950.loc.gov:7090/Voyager",
series = "Chapman and Hall/CRC: The R Series",
URL = "http://www.tandf.net/books/details/9781466587014",
abstract = "\booktitle{Parallel Computing for Data Science: With
Examples in R, C++ and CUDA} is one of the first
parallel computing books to concentrate exclusively on
parallel data structures, algorithms, software tools,
and applications in data science. It includes examples
not only from the classic ``$n$ observations, $p$
variables'' matrix format but also from time series,
network graph models, and numerous other structures
common in data science. The examples illustrate the
range of issues encountered in parallel programming.
With the main focus on computation, the book shows how
to compute on three types of platforms.",
acknowledgement = ack-nhfb,
subject = "Parallel programming (Computer science); Electronic
data processing; R (Computer program language)",
tableofcontents = "Preface \\
Author's Biography \\
1: Introduction to Parallel Processing in R \\
2: ``Why Is My Program So Slow?'': Obstacles to Speed
\\
3: Principles of Parallel Loop Scheduling \\
4: The Shared-Memory Paradigm: A Gentle Introduction
via R \\
5: The Shared-Memory Paradigm: C Level \\
6: The Shared-Memory Paradigm: GPUs \\
7: Thrust and Rth \\
8: The Message Passing Paradigm \\
9: MapReduce Computation \\
10: Parallel Sorting and Merging \\
11: Parallel Pre x Scan \\
12: Parallel Matrix Operations \\
13: Inherently Statistical Approaches: Subset Methods
\\
Appendix A: Review of Matrix Algebra \\
Appendix B: R Quick Start \\
Appendix C: Introduction to C for R Programmers \\
Back Cover",
}
@Article{Muddukrishna:2016:GGO,
author = "Ananya Muddukrishna and Peter A. Jonsson and Artur
Podobas and Mats Brorsson",
title = "Grain graphs: {OpenMP} performance analysis made
easy",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "28:1--28:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851156",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Average programmers struggle to solve performance
problems in OpenMP programs with tasks and parallel
for-loops. Existing performance analysis tools
visualize OpenMP task performance from the runtime
system's perspective where task execution is
interleaved with other tasks in an unpredictable order.
Problems with OpenMP parallel for-loops are similarly
difficult to resolve since tools only visualize
aggregate thread-level statistics such as load
imbalance without zooming into a per-chunk granularity.
The runtime system/threads oriented visualization
provides poor support for understanding problems with
task and chunk execution time, parallelism, and memory
hierarchy utilization, forcing average programmers to
rely on experts or use tedious trial-and-error tuning
methods for performance. We present grain graphs, a new
OpenMP performance analysis method that visualizes
grains --- computation performed by a task or a
parallel for-loop chunk instance --- and highlights
problems such as low parallelism, work inflation and
poor parallelization benefit at the grain level. We
demonstrate that grain graphs can quickly reveal
performance problems that are difficult to detect and
characterize in fine detail using existing
visualizations in standard OpenMP programs, simplifying
OpenMP performance analysis. This enables average
programmers to make portable optimizations for poor
performing OpenMP programs, reducing pressure on
experts and removing the need for tedious
trial-and-error tuning.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '16 conference proceedings.",
}
@Misc{Munshi:2016:OCS,
author = "Aaftab Munshi and Lee Howes and Bartosz Sochacki and
{Khronos OpenCL Working Group}",
title = "The {OpenCL} {C} Specification Version: 2.0 Document
Revision: 33",
howpublished = "Web document.",
pages = "205",
day = "13",
month = apr,
year = "2016",
bibdate = "Mon Apr 16 14:05:49 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/elefunt.bib;
http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.khronos.org/registry/OpenCL/specs/opencl-2.0-openclc.pdf",
acknowledgement = ack-nhfb,
remark = "Section 6.1.3.2 Math Functions, pages 74ff, defines a
function repertoire extended beyond that of ISO C,
including {\tt acospi}, {\tt asinpi}, {\tt atanpi},
{\tt atan2pi}, {\tt cospi}, {\tt sinpi}, {\tt tanpi},
{\tt cospi}, {\tt fract}, {\tt lgamma\_r}, {\tt mad}
(approximation to {\tt a * b + c}), {\tt minmag}, {\tt
pown}, {\tt rootn}, {\tt sincos}, {\tt sinpi}, and {\tt
tanpi}.",
}
@Article{Nadal-Serrano:2016:PSC,
author = "Jose M. Nadal-Serrano and Marisa Lopez-Vallejo",
title = "A Performance Study of {CUDA UVM} versus Manual
Optimizations in a Real-World Setup: Application to a
{Monte Carlo} Wave-Particle Event-Based Interaction
Model",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "6",
pages = "1579--1588",
month = jun,
year = "2016",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2015.2463813",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Jun 14 09:25:28 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://csdl.computer.org/csdl/trans/td/2016/06/07175058-abs.html",
abstract-URL = "http://csdl.computer.org/csdl/trans/td/2016/06/07175058-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Naumenko:2016:ACT,
author = "Mikhail A. Naumenko and Vyacheslav V. Samarin",
title = "Application of {CUDA} technology to calculation of
ground states of few-body nuclei by {Feynman}'s
continual integrals method",
journal = j-SUPERFRI,
volume = "3",
number = "2",
pages = "80--95",
month = "????",
year = "2016",
CODEN = "????",
ISSN = "2409-6008 (print), 2313-8734 (electronic)",
bibdate = "Sat Nov 11 07:15:27 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/superfri.bib",
URL = "http://superfri.org/superfri/article/view/102",
acknowledgement = ack-nhfb,
fjournal = "Supercomputing Frontiers and Innovations",
journal-URL = "http://superfri.org/superfri/issue/archive",
}
@Article{Nogueira:2016:BBW,
author = "David Nogueira and Pedro Tomas and Nuno Roma",
title = "{BowMapCL}: {Burrows--Wheeler} Mapping on Multiple
Heterogeneous Accelerators",
journal = j-TCBB,
volume = "13",
number = "5",
pages = "926--938",
month = sep,
year = "2016",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2015.2495149",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Fri Dec 30 16:19:30 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "The computational demand of exact-search procedures
has pressed the exploitation of parallel processing
accelerators to reduce the execution time of many
applications. However, this often imposes strict
restrictions in terms of the problem size and
implementation efforts, mainly due to their possibly
distinct architectures. To circumvent this limitation,
a new exact-search alignment tool BowMapCL based on the
Burrows--Wheeler Transform and FM-Index is presented.
Contrasting to other alternatives, BowMapCL is based on
a unified implementation using OpenCL, allowing the
exploitation of multiple and possibly different devices
e.g., NVIDIA, AMD/ATI, and Intel GPUs/APUs.
Furthermore, to efficiently exploit such heterogeneous
architectures, BowMapCL incorporates several techniques
to promote its performance and scalability, including
multiple buffering, work-queue task-distribution, and
dynamic load-balancing, together with index
partitioning, bit-encoding, and sampling. When compared
with state-of-the-art tools, the attained results
showed that BowMapCL using a single GPU is $ 2 \times $
to $ 7.5 \times $ faster than mainstream multi-threaded
CPU BWT-based aligners, like Bowtie, BWA, and SOAP2;
and up to $ 4 \times $ faster than the best performing
state-of-the-art GPU implementations namely, SOAP3 and
HPG-BWT. When multiple and completely distinct devices
are considered, BowMapCL efficiently scales the offered
throughput, ensuring a convenient load-balance of the
involved processing in the several distinct devices.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Oger:2016:DMM,
author = "G. Oger and D. {Le Touz{\'e}} and D. Guibert and M. de
Leffe and J. Biddiscombe and J. Soumagne and J.-G.
Piccinali",
title = "On distributed memory {MPI}-based parallelization of
{SPH} codes in massive {HPC} context",
journal = j-COMP-PHYS-COMM,
volume = "200",
number = "??",
pages = "1--14",
month = mar,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu Jan 21 15:04:34 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515003070",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Otten:2016:MOI,
author = "Matthew Otten and Jing Gong and Azamat Mametjanov and
Aaron Vose and John Levesque and Paul Fischer and Misun
Min",
title = "An {MPI\slash OpenACC} implementation of a high-order
electromagnetics solver with {GPUDirect}
communication",
journal = j-IJHPCA,
volume = "30",
number = "3",
pages = "320--334",
month = aug,
year = "2016",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Apr 4 14:51:30 MDT 2017",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Pai:2016:CTO,
author = "Sreepathi Pai and Keshav Pingali",
title = "A compiler for throughput optimization of graph
algorithms on {GPUs}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "1--19",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984015",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Writing high-performance GPU implementations of graph
algorithms can be challenging. In this paper, we argue
that three optimizations called throughput
optimizations are key to high-performance for this
application class. These optimizations describe a large
implementation space making it unrealistic for
programmers to implement them by hand. To address this
problem, we have implemented these optimizations in a
compiler that produces CUDA code from an
intermediate-level program representation called IrGL.
Compared to state-of-the-art handwritten CUDA
implementations of eight graph applications, code
generated by the IrGL compiler is up to 5.95x times
faster (median 1.4x) for five applications and never
more than 30\% slower for the others. Throughput
optimizations contribute an improvement up to 4.16x
(median 1.4x) to the performance of unoptimized IrGL
code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Pang:2016:MKR,
author = "Yeyong Pang and Shaojun Wang and Yu Peng and Xiyuan
Peng and Nicholas J. Fraser and Philip H. W. Leong",
title = "A Microcoded Kernel Recursive Least Squares Processor
Using {FPGA} Technology",
journal = j-TRETS,
volume = "10",
number = "1",
pages = "5:1--5:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2950061",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Mon Apr 3 11:34:09 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
abstract = "Kernel methods utilize linear methods in a nonlinear
feature space and combine the advantages of both.
Online kernel methods, such as kernel recursive least
squares (KRLS) and kernel normalized least mean squares
(KNLMS), perform nonlinear regression in a recursive
manner, with similar computational requirements to
linear techniques. In this article, an architecture for
a microcoded kernel method accelerator is described,
and high-performance implementations of sliding-window
KRLS, fixed-budget KRLS, and KNLMS are presented. The
architecture utilizes pipelining and vectorization for
performance, and microcoding for reusability. The
design can be scaled to allow tradeoffs between
capacity, performance, and area. The design is compared
with a central processing unit (CPU), digital signal
processor (DSP), and Altera OpenCL implementations. In
different configurations on an Altera Arria 10 device,
our SW-KRLS implementation delivers floating-point
throughput of approximately 16 GFLOPs, latency of 5.5 $
\mu $ s, and energy consumption of $ 10^{- 4} $ J,
these being improvements over a CPU by factors of 12,
17, and 24, respectively.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Article{Peraza:2016:PGQ,
author = "Joshua Peraza and Ananta Tiwari and Michael Laurenzano
and Laura Carrington and Allan Snavely",
title = "{PMaC}'s green queue: a framework for selecting energy
optimal {DVFS} configurations in large scale {MPI}
applications",
journal = j-CCPE,
volume = "28",
number = "2",
pages = "211--231",
month = feb,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3184",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:21 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "27 Dec 2013",
}
@Article{Pirk:2016:VVA,
author = "Holger Pirk and Oscar Moll and Matei Zaharia and Sam
Madden",
title = "{Voodoo} --- a vector algebra for portable database
performance on modern hardware",
journal = j-PROC-VLDB-ENDOWMENT,
volume = "9",
number = "14",
pages = "1707--1718",
month = oct,
year = "2016",
CODEN = "????",
ISSN = "2150-8097",
bibdate = "Wed Oct 12 10:14:56 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/vldbe.bib",
abstract = "In-memory databases require careful tuning and many
engineering tricks to achieve good performance. Such
database performance engineering is hard: a plethora of
data and hardware-dependent optimization techniques
form a design space that is difficult to navigate for a
skilled engineer --- even more so for a query compiler.
To facilitate performance-oriented design exploration
and query plan compilation, we present Voodoo, a
declarative intermediate algebra that abstracts the
detailed architectural properties of the hardware, such
as multi- or many-core architectures, caches and SIMD
registers, without losing the ability to generate
highly tuned code. Because it consists of a collection
of declarative, vector-oriented operations, Voodoo is
easier to reason about and tune than low-level C and
related hardware-focused extensions (Intrinsics,
OpenCL, CUDA, etc.). This enables our Voodoo compiler
to produce (OpenCL) code that rivals and even
outperforms the fastest state-of-the-art in memory
databases for both GPUs and CPUs. In addition, Voodoo
makes it possible to express techniques as diverse as
cache-conscious processing, predication and
vectorization (again on both GPUs and CPUs) with just a
few lines of code. Central to our approach is a novel
idea we termed control vectors, which allows a code
generating frontend to expose parallelism to the Voodoo
compiler in a abstract manner, enabling portable
performance across hardware platforms. We used Voodoo
to build an alternative backend for MonetDB, a popular
open-source in-memory database. Our backend allows
MonetDB to perform at the same level as highly tuned
in-memory databases, including HyPeR and Ocelot. We
also demonstrate Voodoo's usefulness when investigating
hardware conscious tuning techniques, assessing their
performance on different queries, devices and data.",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the VLDB Endowment",
journal-URL = "http://portal.acm.org/citation.cfm?id=J1174",
}
@Article{Prabhakar:2016:GCH,
author = "Raghu Prabhakar and David Koeplinger and Kevin J.
Brown and HyoukJoong Lee and Christopher {De Sa} and
Christos Kozyrakis and Kunle Olukotun",
title = "Generating Configurable Hardware from Parallel
Patterns",
journal = j-SIGPLAN,
volume = "51",
number = "4",
pages = "651--665",
month = apr,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2954679.2872415",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Thu Jun 9 17:13:59 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In recent years the computing landscape has seen an
increasing shift towards specialized accelerators.
Field programmable gate arrays (FPGAs) are particularly
promising for the implementation of these accelerators,
as they offer significant performance and energy
improvements over CPUs for a wide class of applications
and are far more flexible than fixed-function ASICs.
However, FPGAs are difficult to program. Traditional
programming models for reconfigurable logic use
low-level hardware description languages like Verilog
and VHDL, which have none of the productivity features
of modern software languages but produce very efficient
designs, and low-level software languages like C and
OpenCL coupled with high-level synthesis (HLS) tools
that typically produce designs that are far less
efficient. Functional languages with parallel patterns
are a better fit for hardware generation because they
provide high-level abstractions to programmers with
little experience in hardware design and avoid many of
the problems faced when generating hardware from
imperative languages. In this paper, we identify two
important optimizations for using parallel patterns to
generate efficient hardware: tiling and metapipelining.
We present a general representation of tiled parallel
patterns, and provide rules for automatically tiling
patterns and generating metapipelines. We demonstrate
experimentally that these optimizations result in
speedups up to 39.4$ \times $ on a set of benchmarks
from the data analytics domain.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '16 conference proceedings.",
}
@Article{Prades:2016:CAX,
author = "Javier Prades and Carlos Rea{\~n}o and Federico
Silla",
title = "{CUDA} acceleration for {Xen} virtual machines in
{InfiniBand} clusters with {rCUDA}",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "35:1--35:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851181",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Many data centers currently use virtual machines (VMs)
to achieve a more efficient usage of hardware
resources. However, current virtualization solutions,
such as Xen, do not easily provide graphics processing
unit (GPU) accelerators to applications running in the
virtualized domain with the flexibility usually
required in data centers (i.e., managing virtual GPU
instances and concurrently sharing them among several
VMs). Remote GPU virtualization frameworks such as the
rCUDA solution may address this problem. In this work
we analyze the use of the rCUDA framework to accelerate
scientific applications running inside Xen VMs. Results
show that the use of the rCUDA framework is a feasible
approach, featuring a very low overhead if an
InfiniBand fabric is already present in the cluster.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Rehman:2016:VMJ,
author = "Waqas Ur Rehman and Muhammad Sohaib Ayub and Junaid
Haroon Siddiqui",
title = "Verification of {MPI} {Java} programs using software
model checking",
journal = j-SIGPLAN,
volume = "51",
number = "8",
pages = "55:1--55:??",
month = aug,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3016078.2851192",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:12 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/java2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Development of concurrent software requires the
programmer to be aware of non-determinism, data races,
and deadlocks. MPI (message passing interface) is a
popular standard for writing message oriented
distributed applications. Some messages in MPI systems
can be processed by one of the many machines and in
many possible orders. This non-determinism can affect
the result of an MPI application. The alternate results
may or may not be correct. To verify MPI applications,
we need to check all these possible orderings and use
an application specific oracle to decide if these
orderings give correct output. MPJ Express is an open
source Java implementation of the MPI standard. We
developed a Java based model of MPJ Express, where
processes are modeled as threads, and which can run
unmodified MPI Java programs on a single system. This
enabled us to adapt the Java PathFinder explicit state
software model checker (JPF) using a custom listener to
verify our model running real MPI Java programs. We
evaluated our approach using small examples where model
checking revealed message orders that would result in
incorrect system behavior.",
acknowledgement = ack-nhfb,
articleno = "55",
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '16 conference proceedings.",
}
@Article{Rico-Gallego:2016:EIL,
author = "Juan-Antonio Rico-Gallego and Juan-Carlos
D{\'\i}az-Mart{\'\i}n and Alexey L. Lastovetsky",
title = "Extending {$ \tau $}-Lop to model concurrent {MPI}
communications in multicore clusters",
journal = j-FUT-GEN-COMP-SYS,
volume = "61",
number = "??",
pages = "66--82",
month = aug,
year = "2016",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Apr 27 09:38:59 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X16300346",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X/",
}
@Article{Sandes:2016:CIS,
author = "Edans Flavius de Oliveira Sandes and Guillermo Miranda
and Xavier Martorell and Eduard Ayguade and George
Teodoro and Alba Cristina Magalhaes Melo",
title = "{CUDAlign 4.0}: Incremental Speculative Traceback for
Exact Chromosome-Wide Alignment in {GPU} Clusters",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "10",
pages = "2838--2850",
month = oct,
year = "2016",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2016.2515597",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Sep 13 06:32:59 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2016/10/07374729-abs.html",
abstract-URL = "https://www.computer.org/csdl/trans/td/2016/10/07374729-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Sandes:2016:MMA,
author = "Edans F. De O. Sandes and Guillermo Miranda and Xavier
Martorell and Eduard Ayguade and George Teodoro and
Alba C. M. A. {De Melo}",
title = "{MASA}: a Multiplatform Architecture for Sequence
Aligners with Block Pruning",
journal = j-TOPC,
volume = "2",
number = "4",
pages = "28:1--28:??",
month = mar,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2858656",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Sat Mar 19 08:11:13 MDT 2016",
bibsource = "http://topc.acm.org/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "Biological sequence alignment is a very popular
application in Bioinformatics, used routinely
worldwide. Many implementations of biological sequence
alignment algorithms have been proposed for multicores,
GPUs, FPGAs and CellBEs. These implementations are
platform-specific; porting them to other systems
requires considerable programming effort. This article
proposes and evaluates MASA, a flexible and
customizable software architecture that enables the
execution of biological sequence alignment applications
with three variants (local, global, and semiglobal) in
multiple hardware/software platforms with block
pruning, which is able to reduce significantly the
amount of data processed. To attain our flexibility
goals, we also propose a generic version of block
pruning and developed multiple parallelization
strategies as building blocks, including a new
asynchronous dataflow-based parallelization, which may
be combined to implement efficient aligners in
different platforms. We provide four MASA aligner
implementations for multicores (OmpSs and OpenMP), GPU
(CUDA), and Intel Phi (OpenMP), showing that MASA is
very flexible. The evaluation of our generic block
pruning strategy shows that it significantly
outperforms the previously proposed block pruning,
being able to prune up to 66.5\% of the cells when
using the new dataflow-based parallelization
strategy.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
remark = "Special Issue on PPoPP'14 conference.",
}
@Article{Sataric:2016:HOM,
author = "Bogdan Satari{\'c} and Vladimir Slavni{\'c} and
Aleksandar Beli{\'c} and Antun Balaz and Paulsamy
Muruganandam and Sadhan K. Adhikari",
title = "Hybrid {OpenMP\slash MPI} programs for solving the
time-dependent {Gross--Pitaevskii} equation in a fully
anisotropic trap",
journal = j-COMP-PHYS-COMM,
volume = "200",
number = "??",
pages = "411--417",
month = mar,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu Jan 21 15:04:34 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465515004440",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Schenck:2016:EPM,
author = "Wolfram Schenck and Salem {El Sayed} and Maciej
Foszczynski and Wilhelm Homberg and Dirk Pleiter",
title = "Evaluation and Performance Modeling of a Burst Buffer
Solution",
journal = j-OPER-SYS-REV,
volume = "50",
number = "3",
pages = "12--26",
month = dec,
year = "2016",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/3041710.3041714",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Thu Feb 9 10:38:58 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/opersysrev.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Hierarchical storage architectures are required to
meet both, capacity and bandwidth requirements for
future high-end storage architectures. In this paper we
present the results of an evaluation of an emerging
technology, DataDirect Networks' (DDN) Infinite Memory
Engine (IME). IME allows to realize a fast buffer in
front of a large capacity storage system. We collected
benchmarking data with IOR and with the HPC application
NEST. The IOR bandwidth results show how well network
bandwidth towards such fast buffer can be exploited
compared to the external storage system. The NEST
benchmarks clearly demonstrate that IME can reduce
I/O-induced load imbalance between MPI ranks to a
minimum while speeding up I/O as a whole by a
considerable factor. In addition to these direct
measurements, a performance model for NEST is
developed. In combination with a generic and abstract
burst buffer architecture, this model generates
predictions about appropriate burst buffer and I/O
parameters to achieve specific performance goals for
NEST on HPC clusters of varying size. Specifically, it
is investigated in which parameter range burst buffers
are able to counteract the widening performance gap
between compute and I/O.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J597",
}
@Article{Soldado:2016:ECM,
author = "F{\'a}bio Soldado and Fernando Alexandre and Herv{\'e}
Paulino",
title = "Execution of compound multi-kernel {OpenCL}
computations in {multi-CPU\slash multi-GPU}
environments",
journal = j-CCPE,
volume = "28",
number = "3",
pages = "768--787",
day = "10",
month = mar,
year = "2016",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3612",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 9 06:13:22 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "28 Aug 2015",
}
@Article{Sorensen:2016:EER,
author = "Tyler Sorensen and Alastair F. Donaldson",
title = "Exposing errors related to weak memory in {GPU}
applications",
journal = j-SIGPLAN,
volume = "51",
number = "6",
pages = "100--113",
month = jun,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/2980983.2908114",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Mon Sep 5 07:32:25 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present the systematic design of a testing
environment that uses stressing and fuzzing to reveal
errors in GPU applications that arise due to weak
memory effects. We evaluate our approach on seven GPUs
spanning three Nvidia architectures, across ten CUDA
applications that use fine-grained concurrency. Our
results show that applications that rarely or never
exhibit errors related to weak memory when executed
natively can readily exhibit these errors when executed
in our testing environment. Our testing environment
also provides a means to help identify the root causes
of such errors, and automatically suggests how to
insert fences that harden an application against weak
memory bugs. To understand the cost of GPU fences, we
benchmark applications with fences provided by the
hardening strategy as well as a more conservative,
sound fencing strategy.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '16 conference proceedings.",
}
@Article{Sorensen:2016:PIW,
author = "Tyler Sorensen and Alastair F. Donaldson and Mark
Batty and Ganesh Gopalakrishnan and Zvonimir
Rakamari{\'c}",
title = "Portable inter-workgroup barrier synchronisation for
{GPUs}",
journal = j-SIGPLAN,
volume = "51",
number = "10",
pages = "39--58",
month = oct,
year = "2016",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3022671.2984032",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:13 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Despite the growing popularity of GPGPU programming,
there is not yet a portable and formally-specified
barrier that one can use to synchronise across
workgroups. Moreover, the occupancy-bound execution
model of GPUs breaks assumptions inherent in
traditional software execution barriers, exposing them
to deadlock. We present an occupancy discovery protocol
that dynamically discovers a safe estimate of the
occupancy for a given GPU and kernel, allowing for a
starvation-free (and hence, deadlock-free)
inter-workgroup barrier by restricting the number of
workgroups according to this estimate. We implement
this idea by adapting an existing, previously
non-portable, GPU inter-workgroup barrier to use OpenCL
2.0 atomic operations, and prove that the barrier meets
its natural specification in terms of synchronisation.
We assess the portability of our approach over eight
GPUs spanning four vendors, comparing the performance
of our method against alternative methods. Our key
findings include: (1){\^A} the recall of our discovery
protocol is nearly 100\%; (2){\^A} runtime comparisons
vary substantially across GPUs and applications; and
(3){\^A} our method provides portable and safe
inter-workgroup synchronisation across the applications
we study.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "OOPSLA '16 conference proceedings.",
}
@Article{Tampouratzis:2016:AIH,
author = "Nikolaos Tampouratzis and Pavlos M. Mattheakis and
Ioannis Papaefstathiou",
title = "Accelerating Intercommunication in Highly Parallel
Systems",
journal = j-TACO,
volume = "13",
number = "4",
pages = "40:1--40:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/3005717",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Wed Dec 28 16:24:46 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Every HPC system consists of numerous processing nodes
interconnect using a number of different inter-process
communication protocols such as Messaging Passing
Interface (MPI) and Global Arrays (GA). Traditionally,
research has focused on optimizing these protocols and
identifying the most suitable ones for each system
and/or application. Recently, there has been a proposal
to unify the primitive operations of the different
inter-processor communication protocols through the
Portals library. Portals offer a set of low-level
communication routines which can be composed in order
to implement the functionality of different
intercommunication protocols. However, Portals
modularity comes at a performance cost, since it adds
one more layer in the actual protocol implementation.
This work aims at closing the performance gap between a
generic and reusable intercommunication layer, such as
Portals, and the several monolithic and highly
optimized intercommunication protocols. This is
achieved through the development of a novel hardware
offload engine efficiently implementing the basic
Portals' modules. Our innovative system is up to two2
orders of magnitude faster than the conventional
software implementation of Portals' while the speedup
achieved over the conventional monolithic software
implementations of MPI and GAs is more than an order of
magnitude. The power consumption of our hardware system
is less than 1/100th of what a low-power CPU consumes
when executing the Portal's software while its silicon
cost is less than 1/10th of that of a very simple RISC
CPU. Moreover, our design process is also innovative
since we have first modeled the hardware within an
untimed virtual prototype which allowed for rapid
design space exploration; then we applied a novel
methodology to transform the untimed description into
an efficient timed hardware description, which was then
transformed into a hardware netlist through a
High-Level Synthesis (HLS) tool.",
acknowledgement = ack-nhfb,
articleno = "40",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Tang:2016:AKM,
author = "Qing Y. Tang and Mohammed A. S. Khalid",
title = "Acceleration of $k$-Means Algorithm Using {Altera SDK}
for {OpenCL}",
journal = j-TRETS,
volume = "10",
number = "1",
pages = "6:1--6:??",
month = dec,
year = "2016",
CODEN = "????",
DOI = "https://doi.org/10.1145/2964910",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Mon Apr 3 11:34:09 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
abstract = "A K-means clustering algorithm involves partitioning
of data iteratively into k clusters. It is one of the
most popular data-mining algorithms [Wu et al. 2007],
and is widely used in other applications, such as image
processing and machine learning. However, k-means is
highly time-consuming when data or cluster size is
large. Traditionally, FPGAs have shown great promise
for accelerating computationally intensive algorithms,
but they are harder to use for acceleration if we rely
on traditional HD-based design methods. The recent
introduction of Altera SDK for the OpenCL high-level
synthesis tool allows developers to utilize FPGA's
potential without long development periods and
extensive hardware knowledge. This article presents an
optimized implementation of a k-means clustering
algorithm on an FPGA using Altera SDK for OpenCL.
Performance and power consumption is measured with
various data, cluster, and dimension sizes. When
compared to state-of-the-art solutions, this
implementation supports larger cluster sizes, offers up
to 21x speed over a CPU and is more power efficient
than a GPU. Unlike previous implementations, it can
deliver consistently high throughput across large or
small feature dimensions given reasonable cluster sizes
and large enough data size.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Article{Vega-Gisbert:2016:DIJ,
author = "Oscar Vega-Gisbert and Jose E. Roman and Jeffrey M.
Squyres",
title = "Design and implementation of {Java} bindings in {Open
MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "59",
number = "??",
pages = "1--20",
month = nov,
year = "2016",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Nov 26 12:06:01 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819116300758",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Wang:2016:LLA,
author = "Jin Wang and Norm Rubin and Albert Sidelnik and
Sudhakar Yalamanchili",
title = "{LaPerm}: locality aware scheduler for dynamic
parallelism on {GPUs}",
journal = j-COMP-ARCH-NEWS,
volume = "44",
number = "3",
pages = "583--595",
month = jun,
year = "2016",
CODEN = "CANED2",
DOI = "https://doi.org/10.1145/3007787.3001199",
ISSN = "0163-5964 (print), 1943-5851 (electronic)",
ISSN-L = "0163-5964",
bibdate = "Thu Jan 12 18:43:43 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigarch.bib",
abstract = "Recent developments in GPU execution models and
architectures have introduced dynamic parallelism to
facilitate the execution of irregular applications
where control flow and memory behavior can be
unstructured, time-varying, and hierarchical. The
changes brought about by this extension to the
traditional bulk synchronous parallel (BSP) model also
creates new challenges in exploiting the current GPU
memory hierarchy. One of the major challenges is that
the reference locality that exists between the parent
and child thread blocks (TBs) created during dynamic
nested kernel and thread block launches cannot be fully
leveraged using the current TB scheduling strategies.
These strategies were designed for the current
implementations of the BSP model but fall short when
dynamic parallelism is introduced since they are
oblivious to the hierarchical reference locality. We
propose LaPerm, a new locality-aware TB scheduler that
exploits such parent-child locality, both spatial and
temporal. LaPerm adopts three different scheduling
decisions to (i) prioritize the execution of the child
TBs, (ii) bind them to the stream multiprocessors (SMXs)
occupied by their parents TBs, and (iii) maintain
workload balance across compute units. Experiments with
a set of irregular CUDA applications executed on a
cycle-level simulator employing dynamic parallelism
demonstrate that LaPerm is able to achieve an average
of 27\% performance improvement over the baseline
round-robin TB scheduler commonly used in modern
GPUs.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGARCH Computer Architecture News",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J89",
remark = "ISCA '16 conference proceedings.",
}
@Article{Wang:2016:MMF,
author = "Zeke Wang and Shuhao Zhang and Bingsheng He and Wei
Zhang",
title = "{Melia}: A {MapReduce} Framework on {OpenCL}-Based
{FPGAs}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "27",
number = "12",
pages = "3547--3560",
month = dec,
year = "2016",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Nov 16 18:43:09 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2016/12/07425227-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Witchel:2016:PPW,
author = "Emmett Witchel",
title = "Programmer Productivity in a World of Mushy
Interfaces: Challenges of the Post-{ISA} Reality",
journal = j-OPER-SYS-REV,
volume = "50",
number = "2",
pages = "591--591",
month = jun,
year = "2016",
CODEN = "OSRED8",
DOI = "https://doi.org/10.1145/2954680.2876511",
ISSN = "0163-5980 (print), 1943-586X (electronic)",
ISSN-L = "0163-5980",
bibdate = "Thu Jun 9 17:03:34 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/opersysrev.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Since 1964, we had the notion that the instruction set
architecture (ISA) is a useful and fairly opaque
abstraction layer between hardware and software.
Software rode hardware's performance wave while
remaining gloriously oblivious to hardware's growing
complexity. Unfortunately, the jig is up. We still have
ISAs, but the abstraction no longer offers seamless
portability---parallel software needs to be tuned for
different core counts, and heterogeneous processing
elements (CPUs, GPUs, accelerators) further complicate
programmability. We are better at building large-scale
heterogeneous processors than we are at programming
them. Maintaining software across multiple current
platforms is difficult and porting to future platforms
is also difficult. There have been many technical
responses: virtual ISAs (e.g., NVIDIA's PTX),
higher-level programming interfaces (e.g., CUDA or
OpenCL), and late-stage compilation and
platform-specific tailoring (e.g., Android ART), etc. A
team of opinionated experts, drawn from the three
ASPLOS communities will examine the problem of
programmer productivity in the post-ISA world, first
from the perspective of their area of expertise and
then noting the contributions from the other two
communities. What research will save us and how? This
wide-ranging debate will frame important research areas
for future work while being grounded in frank
discussion about what has succeeded in the past.
Attendees can expect actionable insight into important
research issues as well an entertaining discussion.",
acknowledgement = ack-nhfb,
fjournal = "Operating Systems Review",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J597",
}
@Article{Yang:2016:HTM,
author = "Fan Yang and Jinfeng Li and James Cheng",
title = "{Husky}: towards a more efficient and expressive
distributed computing framework",
journal = j-PROC-VLDB-ENDOWMENT,
volume = "9",
number = "5",
pages = "420--431",
month = jan,
year = "2016",
CODEN = "????",
ISSN = "2150-8097",
bibdate = "Mon Jan 11 17:54:24 MST 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/vldbe.bib",
abstract = "Finding efficient, expressive and yet intuitive
programming models for data-parallel computing system
is an important and open problem. Systems like Hadoop
and Spark have been widely adopted for massive data
processing, as coarse-grained primitives like map and
reduce are succinct and easy to master. However,
sometimes over-simplified API hinders programmers from
more fine-grained control and designing more efficient
algorithms. Developers may have to resort to
sophisticated domain-specific languages (DSLs), or even
low-level layers like MPI, but this raises development
cost---learning many mutually exclusive systems
prolongs the development schedule, and the use of
low-level tools may result in bug-prone programming.
This motivated us to start the Husky open-source
project, which is an attempt to strike a better balance
between high performance and low development cost.
Husky is developed mainly for in-memory large scale
data mining, and also serves as a general research
platform for designing efficient distributed
algorithms. We show that many existing frameworks can
be easily implemented and bridged together inside
Husky, and Husky is able to achieve similar or even
better performance compared with domain-specific
systems.",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the VLDB Endowment",
journal-URL = "http://portal.acm.org/citation.cfm?id=J1174",
}
@Article{Young-S:2016:OFP,
author = "Luis E. Young-S. and Dusan Vudragovi{\'c} and Paulsamy
Muruganandam and Sadhan K. Adhikari and Antun Balaz",
title = "{OpenMP Fortran} and {C} programs for solving the
time-dependent {Gross--Pitaevskii} equation in an
anisotropic trap",
journal = j-COMP-PHYS-COMM,
volume = "204",
number = "??",
pages = "209--213",
month = jul,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri May 13 19:25:21 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/fortran2.bib;
http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S001046551630073X",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Zaza:2016:CBP,
author = "Ayham Zaza and Abeeb A. Awotunde and Faisal A. Fairag
and Mayez A. Al-Mouhamed",
title = "A {CUDA} based parallel multi-phase oil reservoir
simulator",
journal = j-COMP-PHYS-COMM,
volume = "206",
number = "??",
pages = "2--16",
month = sep,
year = "2016",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jun 10 18:27:25 MDT 2016",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465516300996",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Agullo:2017:BGB,
author = "Emmanuel Agullo and Olivier Aumage and Berenger Bramas
and Olivier Coulaud and Samuel Pitoiset",
title = "Bridging the Gap Between {OpenMP} and Task-Based
Runtime Systems for the {Fast Multipole Method}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "28",
number = "10",
pages = "2794--2807",
month = oct,
year = "2017",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2017.2697857",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Oct 12 06:58:12 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2017/10/07912335-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Al-Refaie:2017:PAH,
author = "Ahmed F. Al-Refaie and Jonathan Tennyson",
title = "A parallel algorithm for {Hamiltonian} matrix
construction in electron-molecule collision
calculations: {MPI--SCATCI}",
journal = j-COMP-PHYS-COMM,
volume = "221",
number = "??",
pages = "53--62",
month = dec,
year = "2017",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Oct 16 14:20:16 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517302436",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Al-Refaie:2017:PCT,
author = "Ahmed F. Al-Refaie and Sergei N. Yurchenko and
Jonathan Tennyson",
title = "{{\bf G}PU {\bf A}ccelerated {\bf IN}tensities MPI
(GAIN-MPI)}: a new method of computing {Einstein-$A$}
coefficients",
journal = j-COMP-PHYS-COMM,
volume = "214",
number = "??",
pages = "216--224",
month = may,
year = "2017",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2017.01.013",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Mar 3 06:05:58 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517300255",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Aliaga:2017:CTP,
author = "Jos{\'e} I. Aliaga and Mar{\'\i}a Barreda and Goran
Flegar and Matthias Bollh{\"o}fer and Enrique S.
Quintana-Ort{\'\i}",
title = "Communication in task-parallel {ILU}-preconditioned
{CG} solvers using {MPI + OmpSs}",
journal = j-CCPE,
volume = "29",
number = "21",
pages = "??--??",
day = "10",
month = nov,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4280",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Dec 30 09:11:58 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Alvanos:2017:PMM,
author = "Michail Alvanos and Theodoros Christoudias",
title = "\pkg{MEDINA}: {MECCA} Development in Accelerators ---
{KPP Fortran} to {CUDA} source-to-source
Pre-processor",
journal = j-J-OPEN-RES-SOFT,
volume = "5",
number = "1",
pages = "13--??",
day = "28",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.5334/jors.158",
ISSN = "2049-9647",
ISSN-L = "2049-9647",
bibdate = "Sat Sep 8 10:03:50 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/jors.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://openresearchsoftware.metajnl.com/articles/10.5334/jors.158/",
acknowledgement = ack-nhfb,
fjournal = "Journal of Open Research Software",
journal-URL = "https://openresearchsoftware.metajnl.com/issue/archive/",
}
@Article{Anderson:2017:BGB,
author = "Michael Anderson and Shaden Smith and Narayanan
Sundaram and Mihai Capota and Zheguang Zhao and
Subramanya Dulloor and Nadathur Satish and Theodore L.
Willke",
title = "Bridging the gap between {HPC} and big data
frameworks",
journal = j-PROC-VLDB-ENDOWMENT,
volume = "10",
number = "8",
pages = "901--912",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.14778/3090163.3090168",
ISSN = "2150-8097",
bibdate = "Fri Jun 23 17:12:46 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/vldbe.bib",
abstract = "Apache Spark is a popular framework for data analytics
with attractive features such as fault tolerance and
interoperability with the Hadoop ecosystem.
Unfortunately, many analytics operations in Spark are
an order of magnitude or more slower compared to native
implementations written with high performance computing
tools such as MPI. There is a need to bridge the
performance gap while retaining the benefits of the
Spark ecosystem such as availability, productivity, and
fault tolerance. In this paper, we propose a system for
integrating MPI with Spark and analyze the costs and
benefits of doing so for four distributed graph and
machine learning applications. We show that offloading
computation to an MPI environment from within Spark
provides 3.1--17.7$ \times $ speedups on the four
sparse applications, including all of the overheads.
This opens up an avenue to reuse existing MPI libraries
in Spark with little effort.",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the VLDB Endowment",
journal-URL = "http://portal.acm.org/citation.cfm?id=J1174",
}
@Article{Arteaga:2017:GFG,
author = "Jaime Arteaga and St{\'e}phane Zuckerman and Guang R.
Gao",
title = "Generating Fine-Grain Multithreaded Applications Using
a Multigrain Approach",
journal = j-TACO,
volume = "14",
number = "4",
pages = "47:1--47:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3155288",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Dec 22 18:25:55 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "The recent evolution in hardware landscape, aimed at
producing high-performance computing systems capable of
reaching extreme-scale performance, has reignited the
interest in fine-grain multithreading, particularly at
the intranode level. Indeed, popular parallel
programming environments, such as OpenMP, which
features a simple interface for the parallelization of
programs, are now incorporating fine-grain constructs.
However, since coarse-grain directives are still
heavily used, the OpenMP runtime is forced to support
both coarse- and fine-grain models of execution,
potentially reducing the advantages obtained when
executing an application in a fully fine-grain
environment. To evaluate the type of applications that
benefit from executing in a unified fine-grain program
execution model, this article presents a multigrain
parallel programming environment for the generation of
fine-grain multithreaded applications from programs
featuring OpenMP's API, allowing OpenMP programs to be
run on top of a fine-grain event-driven program
execution model. Experimental results with five
scientific benchmarks show that fine-grain
applications, generated by and run on our environment
with two runtimes implementing a fine-grain
event-driven program execution model, are competitive
and can outperform their OpenMP counterparts,
especially for data-intensive workloads with irregular
and dynamic parallelism, reaching speedups as high as
2.6$ \times $ for Graph500 and 51$ \times $ for NAS
Data Cube.",
acknowledgement = ack-nhfb,
articleno = "47",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Awan:2017:CCD,
author = "Ammar Ahmad Awan and Khaled Hamidouche and Jahanzeb
Maqbool Hashmi and Dhabaleswar K. Panda",
title = "{S-Caffe}: Co-designing {MPI} Runtimes and {Caffe} for
Scalable Deep Learning on Modern {GPU} Clusters",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "193--205",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018769",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Availability of large data sets like ImageNet and
massively parallel computation support in modern HPC
devices like NVIDIA GPUs have fueled a renewed interest
in Deep Learning (DL) algorithms. This has triggered
the development of DL frameworks like Caffe, Torch,
TensorFlow, and CNTK. However, most DL frameworks have
been limited to a single node. In order to scale out DL
frameworks and bring HPC capabilities to the DL arena,
we propose, S-Caffe; a scalable and distributed Caffe
adaptation for modern multi-GPU clusters. With an
in-depth analysis of new requirements brought forward
by the DL frameworks and limitations of current
communication runtimes, we present a co-design of the
Caffe framework and the MVAPICH2-GDR MPI runtime. Using
the co-design methodology, we modify Caffe's workflow
to maximize the overlap of computation and
communication with multi-stage data propagation and
gradient aggregation schemes. We bring DL-Awareness to
the MPI runtime by proposing a hierarchical reduction
design that benefits from CUDA-Aware features and
provides up to a massive 133x speedup over OpenMPI and
2.6x speedup over MVAPICH2 for 160 GPUs. S-Caffe
successfully scales up to 160 K-80 GPUs for GoogLeNet
(ImageNet) with a speedup of 2.5x over 32 GPUs. To the
best of our knowledge, this is the first framework that
scales up to 160 GPUs. Furthermore, even for single
node training, S-Caffe shows an improvement of 14\% and
9\% over Nvidia's optimized Caffe for 8 and 16 GPUs,
respectively. In addition, S-Caffe achieves up to 1395
samples per second for the AlexNet model, which is
comparable to the performance of Microsoft CNTK.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Bae:2017:SEF,
author = "Seung-Hee Bae and Daniel Halperin and Jevin D. West
and Martin Rosvall and Bill Howe",
title = "Scalable and Efficient Flow-Based Community Detection
for Large-Scale Graph Analysis",
journal = j-TKDD,
volume = "11",
number = "3",
pages = "32:1--32:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/2992785",
ISSN = "1556-4681 (print), 1556-472X (electronic)",
ISSN-L = "1556-4681",
bibdate = "Mon Jul 24 17:32:52 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tkdd/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tkdd.bib",
abstract = "Community detection is an increasingly popular
approach to uncover important structures in large
networks. Flow-based community detection methods rely
on communication patterns of the network rather than
structural properties to determine communities. The
Infomap algorithm in particular optimizes a novel
objective function called the map equation and has been
shown to outperform other approaches in third-party
benchmarks. However, Infomap and its variants are
inherently sequential, limiting their use for
large-scale graphs. In this article, we propose a novel
algorithm to optimize the map equation called RelaxMap.
RelaxMap provides two important improvements over
Infomap: parallelization, so that the map equation can
be optimized over much larger graphs, and
prioritization, so that the most important work occurs
first, iterations take less time, and the algorithm
converges faster. We implement these techniques using
OpenMP on shared-memory multicore systems, and evaluate
our approach on a variety of graphs from standard graph
clustering benchmarks as well as real graph datasets.
Our evaluation shows that both techniques are
effective: RelaxMap achieves 70\% parallel efficiency
on eight cores, and prioritization improves algorithm
performance by an additional 20--50\% on average,
depending on the graph properties. Additionally,
RelaxMap converges in the similar number of iterations
and provides solutions of equivalent quality as the
serial Infomap implementation.",
acknowledgement = ack-nhfb,
articleno = "32",
fjournal = "ACM Transactions on Knowledge Discovery from Data
(TKDD)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1054",
}
@Article{Barthels:2017:DJA,
author = "Claude Barthels and Ingo M{\"u}ller and Timo Schneider
and Gustavo Alonso and Torsten Hoefler",
title = "Distributed join algorithms on thousands of cores",
journal = j-PROC-VLDB-ENDOWMENT,
volume = "10",
number = "5",
pages = "517--528",
month = jan,
year = "2017",
CODEN = "????",
ISSN = "2150-8097",
bibdate = "Sat Feb 25 09:01:51 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/vldbe.bib",
abstract = "Traditional database operators such as joins are
relevant not only in the context of database engines
but also as a building block in many computational and
machine learning algorithms. With the advent of big
data, there is an increasing demand for efficient join
algorithms that can scale with the input data size and
the available hardware resources. In this paper, we
explore the implementation of distributed join
algorithms in systems with several thousand cores
connected by a low-latency network as used in high
performance computing systems or data centers. We
compare radix hash join to sort-merge join algorithms
and discuss their implementation at this scale. In the
paper, we explain how to use MPI to implement joins,
show the impact and advantages of RDMA, discuss the
importance of network scheduling, and study the
relative performance of sorting vs. hashing. The
experimental results show that the algorithms we
present scale well with the number of cores, reaching a
throughput of 48.7 billion input tuples per second on
4,096 cores.",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the VLDB Endowment",
journal-URL = "http://portal.acm.org/citation.cfm?id=J1174",
}
@Article{Bonelli:2017:MCA,
author = "Francesco Bonelli and Michele Tuttafesta and Gianpiero
Colonna and Luigi Cutrone and Giuseppe Pascazio",
title = "An {MPI--CUDA} approach for hypersonic flows with
detailed state-to-state air kinetics using a {GPU}
cluster",
journal = j-COMP-PHYS-COMM,
volume = "219",
number = "??",
pages = "178--195",
month = oct,
year = "2017",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Jul 26 06:22:13 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517301613",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Bruel:2017:ACC,
author = "Pedro Bruel and Marcos Amar{\'\i}s and Alfredo
Goldman",
title = "Autotuning {CUDA} compiler parameters for
heterogeneous applications using the {OpenTuner}
framework",
journal = j-CCPE,
volume = "29",
number = "22",
pages = "??--??",
day = "25",
month = nov,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3973",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Dec 30 09:11:59 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Carpen-Amarie:2017:EOC,
author = "Alexandra Carpen-Amarie and Sascha Hunold and Jesper
Larsson Tr{\"a}ff",
title = "On expected and observed communication performance
with {MPI} derived datatypes",
journal = j-PARALLEL-COMPUTING,
volume = "69",
number = "??",
pages = "98--117",
month = nov,
year = "2017",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Oct 24 15:15:02 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819117301217",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Celik:2017:BET,
author = "Ahmet Celik and Sreepathi Pai and Sarfraz Khurshid and
Milos Gligoric",
title = "Bounded exhaustive test-input generation on {GPUs}",
journal = j-PACMPL,
volume = "1",
number = "OOPSLA",
pages = "94:1--94:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3133918",
ISSN = "2475-1421",
bibdate = "Wed Jan 10 09:45:26 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pacmpl.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
articleno = "94",
fjournal = "Proceedings of the ACM on Programming Languages",
journal-URL = "https://pacmpl.acm.org/",
}
@Article{Chabbi:2017:EAL,
author = "Milind Chabbi and Abdelhalim Amer and Shasha Wen and
Xu Liu",
title = "An Efficient Abortable-locking Protocol for
Multi-level {NUMA} Systems",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "61--74",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018768",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "The popularity of Non-Uniform Memory Access (NUMA)
architectures has led to numerous locality-preserving
hierarchical lock designs, such as HCLH, HMCS, and
cohort locks. Locality-preserving locks trade fairness
for higher throughput. Hence, some instances of
acquisitions can incur long latencies, which may be
intolerable for certain applications. Few locks admit a
waiting thread to abandon its protocol on a timeout.
State-of-the-art abortable locks are not fully locality
aware, introduce high overheads, and unsuitable for
frequent aborts. Enhancing locality-aware locks with
lightweight timeout capability is critical for their
adoption. In this paper, we design and evaluate the
HMCS-T lock, a Hierarchical MCS (HMCS) lock variant
that admits a timeout. HMCS-T maintains the locality
benefits of HMCS while ensuring aborts to be
lightweight. HMCS-T offers the progress guarantee
missing in most abortable queuing locks. Our
evaluations show that HMCS-T offers the timeout feature
at a moderate overhead over its HMCS analog. HMCS-T,
used in an MPI runtime lock, mitigated the poor
scalability of an MPI+OpenMP BFS code and resulted in
4.3x superior scaling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Chen:2017:AAG,
author = "Jian Chen and Russell M. Clapp",
title = "{Astro}: Auto-Generation of Synthetic Traces Using
Scaling Pattern Recognition for {MPI} Workloads",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "28",
number = "8",
pages = "2159--2171",
month = aug,
year = "2017",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2017.2649518",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Jul 25 18:46:21 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2017/08/07809142-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Cornelis:2017:HAV,
author = "Jan G. Cornelis and Jan Lemeire and Tim Bruylants and
Peter Schelkens",
title = "Heterogeneous acceleration of volumetric {JPEG 2000}
using {OpenCL}",
journal = j-IJHPCA,
volume = "31",
number = "3",
pages = "229--245",
year = "2017",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342016646438",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue Nov 6 06:13:05 MST 2018",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/full/10.1177/1094342016646438",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
xxmonth = may,
}
@Article{Dang:2017:ECB,
author = "Hoang-Vu Dang and Marc Snir and William Gropp",
title = "Eliminating contention bottlenecks in multithreaded
{MPI}",
journal = j-PARALLEL-COMPUTING,
volume = "69",
number = "??",
pages = "1--23",
month = nov,
year = "2017",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Oct 24 15:15:02 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819117301187",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Dashti:2017:AMM,
author = "Mohammad Dashti and Alexandra Fedorova",
title = "Analyzing memory management methods on integrated
{CPU--GPU} systems",
journal = j-SIGPLAN,
volume = "52",
number = "9",
pages = "59--69",
month = sep,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3156685.3092256",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:13 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous systems that integrate a multicore CPU
and a GPU on the same die are ubiquitous. On these
systems, both the CPU and GPU share the same physical
memory as opposed to using separate memory dies.
Although integration eliminates the need to copy data
between the CPU and the GPU, arranging transparent
memory sharing between the two devices can carry large
overheads. Memory on CPU/GPU systems is typically
managed by a software framework such as OpenCL or CUDA,
which includes a runtime library, and communicates with
a GPU driver. These frameworks offer a range of memory
management methods that vary in ease of use,
consistency guarantees and performance. In this study,
we analyze some of the common memory management methods
of the most widely used software frameworks for
heterogeneous systems: CUDA, OpenCL 1.2, OpenCL 2.0,
and HSA, on NVIDIA and AMD hardware. We focus on
performance/functionality trade-offs, with the goal of
exposing their performance impact and simplifying the
choice of memory management methods for programmers.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ISMM '17 conference proceedings.",
}
@Article{deAndrade:2017:OFH,
author = "Douglas Coimbra de Andrade and Lu{\'\i}s Gonzaga
Trabasso",
title = "An {OpenCL} framework for high performance extraction
of image features",
journal = j-J-PAR-DIST-COMP,
volume = "109",
number = "??",
pages = "75--88",
month = nov,
year = "2017",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Aug 19 13:10:32 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731517301624",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Degomme:2017:SMA,
author = "Augustin Degomme and Arnaud Legrand and George S.
Markomanolis and Martin Quinson and Mark Stillwell and
Frederic Suter",
title = "Simulating {MPI} Applications: The {SMPI} Approach",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "28",
number = "8",
pages = "2387--2400",
month = aug,
year = "2017",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2017.2669305",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Jul 25 18:46:21 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2017/08/07855780-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Diavastos:2017:SLR,
author = "Andreas Diavastos and Pedro Trancoso",
title = "{SWITCHES}: a Lightweight Runtime for Dataflow
Execution of Tasks on Many-Cores",
journal = j-TACO,
volume = "14",
number = "3",
pages = "31:1--31:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3127068",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Wed Sep 6 17:12:05 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "SWITCHES is a task-based dataflow runtime that
implements a lightweight distributed triggering system
for runtime dependence resolution and uses static
scheduling and compile-time assignment policies to
reduce runtime overheads. Unlike other systems, the
granularity of loop-tasks can be increased to favor
data-locality, even when having dependences across
different loops. SWITCHES introduces explicit task
resource allocation mechanisms for efficient allocation
of resources and adopts the latest OpenMP Application
Programming Interface (API), as to maintain high levels
of programming productivity. It provides a
source-to-source tool that automatically produces
thread-based code. Performance on an Intel Xeon-Phi
shows good scalability and surpasses OpenMP by an
average of 32\%.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Dietrich:2017:CBA,
author = "Robert Dietrich and Felix Schmitt and Alexander Grund
and Jonas Stolle",
title = "Critical-blame analysis for {OpenMP 4.0} offloading on
{Intel Xeon Phi}",
journal = j-J-SYST-SOFTW,
volume = "125",
number = "??",
pages = "381--388",
month = mar,
year = "2017",
CODEN = "JSSODM",
ISSN = "0164-1212 (print), 1873-1228 (electronic)",
ISSN-L = "0164-1212",
bibdate = "Sat Feb 4 12:20:39 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsystsoftw.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "//www.sciencedirect.com/science/article/pii/S0164121215002940",
acknowledgement = ack-nhfb,
fjournal = "Journal of Systems and Software",
journal-URL = "http://www.sciencedirect.com/science/journal/01641212/",
}
@Article{Eizenberg:2017:BBL,
author = "Ariel Eizenberg and Yuanfeng Peng and Toma Pigli and
William Mansky and Joseph Devietti",
title = "{BARRACUDA}: binary-level analysis of runtime {RAces}
in {CUDA} programs",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "126--140",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062342",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "GPU programming models enable and encourage massively
parallel programming with over a million threads,
requiring extreme parallelism to achieve good
performance. Massive parallelism brings significant
correctness challenges by increasing the possibility
for bugs as the number of thread interleavings
balloons. Conventional dynamic safety analyses struggle
to run at this scale. We present BARRACUDA, a
concurrency bug detector for GPU programs written in
Nvidia's CUDA language. BARRACUDA handles a wider range
of parallelism constructs than previous work, including
branch operations, low-level atomics and memory fences,
which allows BARRACUDA to detect new classes of
concurrency bugs. BARRACUDA operates at the binary
level for increased compatibility with existing code,
leveraging a new binary instrumentation framework that
is extensible to other dynamic analyses. BARRACUDA
incorporates a number of novel optimizations that are
crucial for scaling concurrency bug detection to over a
million threads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '17 conference proceedings.",
}
@Article{Fachada:2017:CCF,
author = "Nuno Fachada and Vitor V. Lopes and Rui C. Martins and
Agostinho C. Rosa",
title = "{\tt cf4ocl}: a {C} framework for {OpenCL}",
journal = j-SCI-COMPUT-PROGRAM,
volume = "143",
number = "??",
pages = "9--19",
day = "1",
month = sep,
year = "2017",
CODEN = "SCPGD4",
ISSN = "0167-6423 (print), 1872-7964 (electronic)",
ISSN-L = "0167-6423",
bibdate = "Wed Jul 26 05:56:44 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scicomputprogram.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167642317300540",
acknowledgement = ack-nhfb,
fjournal = "Science of Computer Programming",
journal-URL = "http://www.sciencedirect.com/science/journal/01676423",
}
@Article{Falch:2017:MLB,
author = "Thomas L. Falch and Anne C. Elster",
title = "Machine learning-based auto-tuning for enhanced
performance portability of {OpenCL} applications",
journal = j-CCPE,
volume = "29",
number = "8",
pages = "??--??",
day = "25",
month = apr,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4029",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Fri Mar 31 19:12:52 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Falch:2017:RAM,
author = "Thomas L. Falch and Anne C. Elster",
title = "Machine learning-based auto-tuning for enhanced
performance portability of {OpenCL} applications",
journal = j-CCPE,
volume = "29",
number = "8",
pages = "??--??",
day = "25",
month = apr,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4029",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Fri Mar 31 19:12:52 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Fan:2017:SEE,
author = "Xing Fan and Mostafa Mehrabi and Oliver Sinnen and
Nasser Giacaman",
title = "Supporting Enhanced Exception Handling with {OpenMP}
in Object--Oriented Languages",
journal = j-INT-J-PARALLEL-PROG,
volume = "45",
number = "6",
pages = "1366--1389",
month = dec,
year = "2017",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-016-0474-x",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Nov 18 09:27:28 MST 2017",
bibsource = "http://link.springer.com/journal/10766/45/6;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Forejt:2017:PPA,
author = "Vojt{\u{a}}ch Forejt and Saurabh Joshi and Daniel
Kroening and Ganesh Narayanaswamy and Subodh Sharma",
title = "Precise Predictive Analysis for Discovering
Communication Deadlocks in {MPI} Programs",
journal = j-TOPLAS,
volume = "39",
number = "4",
pages = "15:1--15:??",
month = sep,
year = "2017",
CODEN = "ATPSDT",
DOI = "https://doi.org/10.1145/3095075",
ISSN = "0164-0925 (print), 1558-4593 (electronic)",
ISSN-L = "0164-0925",
bibdate = "Tue Sep 19 06:38:32 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/toplas/;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toplas.bib",
abstract = "The Message Passing Interface (MPI) is the standard
API for parallelization in high-performance and
scientific computing. Communication deadlocks are a
frequent problem in MPI programs, and this article
addresses the problem of discovering such deadlocks. We
begin by showing that if an MPI program is single path,
the problem of discovering communication deadlocks is
NP-complete. We then present a novel propositional
encoding scheme that captures the existence of
communication deadlocks. The encoding is based on
modeling executions with partial orders and implemented
in a tool called MOPPER. The tool executes an MPI
program, collects the trace, builds a formula from the
trace using the propositional encoding scheme, and
checks its satisfiability. Finally, we present
experimental results that quantify the benefit of the
approach in comparison to other analyzers and
demonstrate that it offers a scalable solution for
single-path programs.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Programming Languages and
Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J783",
}
@Article{Frust:2017:RDP,
author = "Tobias Frust and Michael Wagner and Jan Stephan and
Guido Juckeland and Andr{\'e} Bieberle",
title = "Rapid data processing for ultrafast {X}-ray computed
tomography using scalable and modular {CUDA} based
pipelines",
journal = j-COMP-PHYS-COMM,
volume = "219",
number = "??",
pages = "353--360",
month = oct,
year = "2017",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Jul 26 06:22:13 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517301674",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Fumero:2017:JTG,
author = "Juan Fumero and Michel Steuwer and Lukas Stadler and
Christophe Dubach",
title = "Just-In-Time {GPU} Compilation for Interpreted
Languages with Partial Evaluation",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "60--73",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050761",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Computer systems are increasingly featuring powerful
parallel devices with the advent of many-core CPUs and
GPUs. This offers the opportunity to solve
computationally-intensive problems at a fraction of the
time traditional CPUs need. However, exploiting
heterogeneous hardware requires the use of low-level
programming language approaches such as OpenCL, which
is incredibly challenging, even for advanced
programmers. On the application side, interpreted
dynamic languages are increasingly becoming popular in
many domains due to their simplicity, expressiveness
and flexibility. However, this creates a wide gap
between the high-level abstractions offered to
programmers and the low-level hardware-specific
interface. Currently, programmers must rely on high
performance libraries or they are forced to write parts
of their application in a low-level language like
OpenCL. Ideally, nonexpert programmers should be able
to exploit heterogeneous hardware directly from their
interpreted dynamic languages. In this paper, we
present a technique to transparently and automatically
offload computations from interpreted dynamic languages
to heterogeneous devices. Using just-in-time
compilation, we automatically generate OpenCL code at
runtime which is specialized to the actual observed
data types using profiling information. We demonstrate
our technique using R, which is a popular interpreted
dynamic language predominately used in big data
analytic. Our experimental results show the execution
on a GPU yields speedups of over 150x compared to the
sequential FastR implementation and the obtained
performance is competitive with manually written GPU
code. We also show that when taking into account
start-up time, large speedups are achievable, even when
the applications run for as little as a few seconds.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "VEE '17 conference proceedings.",
}
@Article{Germanas:2017:HUP,
author = "D. Germanas and A. Stepsys and S. Mickevicius and R.
K. Kalinauskas",
title = "{HOTB} update: Parallel code for calculation of three-
and four-particle harmonic oscillator transformation
brackets and their matrices using {OpenMP}",
journal = j-COMP-PHYS-COMM,
volume = "215",
number = "??",
pages = "259--264",
month = jun,
year = "2017",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Mar 31 15:52:48 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517300401",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Ghose:2017:FOT,
author = "Anirban Ghose and Lokesh Dokara and Soumyajit Dey and
Pabitra Mitra",
title = "A Framework for {OpenCL} Task Scheduling on
Heterogeneous Multicores",
journal = j-PARALLEL-PROCESS-LETT,
volume = "27",
number = "3--4",
pages = "1750008",
year = "2017",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626417500086",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Tue May 29 09:05:31 MDT 2018",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Gonzalez-Alvarez:2017:HMO,
author = "David L. Gonz{\'a}lez-{\'A}lvarez and Miguel A.
Vega-Rodr{\'\i}guez and {\'A}lvaro Rubio-Largo",
title = "A hybrid {MPI\slash OpenMP} parallel implementation of
{NSGA--II} for finding patterns in protein sequences",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "6",
pages = "2285--2312",
month = jun,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1916-3",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:33 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/73/6;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Grosset:2017:TTT,
author = "A. V. Pascal Grosset and Manasa Prasad and Cameron
Christensen and Aaron Knoll and Charles Hansen",
title = "{TOD}-Tree: Task-Overlapped Direct Send Tree Image
Compositing for Hybrid {MPI} Parallelism and {GPUs}",
journal = j-IEEE-TRANS-VIS-COMPUT-GRAPH,
volume = "23",
number = "6",
pages = "1677--1690",
month = jun,
year = "2017",
CODEN = "ITVGEA",
DOI = "https://doi.org/10.1109/TVCG.2016.2542069",
ISSN = "1077-2626 (print), 1941-0506 (electronic), 2160-9306",
ISSN-L = "1077-2626",
bibdate = "Thu Jun 29 18:38:25 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetransviscomputgraph.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/tg/2017/06/07433468-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=2945",
}
@Article{Han:2017:SLS,
author = "Yiming Han and Anthony T. Chronopoulos",
title = "Scalable Loop Self-scheduling Schemes for Large-Scale
Clusters and Cloud Systems",
journal = j-INT-J-PARALLEL-PROG,
volume = "45",
number = "3",
pages = "595--611",
month = jun,
year = "2017",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-016-0434-5",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jun 24 11:37:59 MDT 2017",
bibsource = "http://link.springer.com/journal/10766/45/3;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Haque:2017:CCL,
author = "S. Anisul Haque and X. Li and F. Mansouri and M.
Moreno Maza and D. Mohajerani and W. Pan",
title = "{CUMODP}: a {CUDA} library for modular polynomial
computation",
journal = j-ACM-COMM-COMP-ALGEBRA,
volume = "51",
number = "3",
pages = "89--91",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3177795.3177799",
ISSN = "1932-2232 (print), 1932-2240 (electronic)",
ISSN-L = "1932-2232",
bibdate = "Fri Jan 5 06:22:51 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigsam.bib",
abstract = "The CUDA Modular Polynomial (CUMODP) Library
implements arithmetic operations for dense matrices and
dense polynomials, primarily with modular integer
coefficients. Some operations are available for integer
or floating point coefficients. Similar to other
software libraries, like CuBLAS$^1$ targeting Graphics
Processing Units (GPUs), CUMODP focuses on
efficiency-critical routines and provides them in the
form of device functions and CUDA kernels. Hence, these
routines are primarily designed to offer GPU support to
polynomial system solvers. A bivariate system solver is
part of the library, as a proof-of-concept. Its
implementation is presented in [10] and it is
integrated in Maple's Triangularize command$^2$, since
the release 18 of Maple.",
acknowledgement = ack-nhfb,
fjournal = "ACM Communications in Computer Algebra",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J1000",
}
@Article{Hasanov:2017:HRC,
author = "Khalid Hasanov and Alexey Lastovetsky",
title = "Hierarchical redesign of classic {MPI} reduction
algorithms",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "2",
pages = "713--725",
month = feb,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1779-7",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:32 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/73/2;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Henriksen:2017:FPF,
author = "Troels Henriksen and Niels G. W. Serup and Martin
Elsman and Fritz Henglein and Cosmin E. Oancea",
title = "{Futhark}: purely functional {GPU-programming} with
nested parallelism and in-place array updates",
journal = j-SIGPLAN,
volume = "52",
number = "6",
pages = "556--571",
month = jun,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140587.3062354",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Futhark is a purely functional data-parallel array
language that offers a machine-neutral programming
model and an optimising compiler that generates OpenCL
code for GPUs. This paper presents the design and
implementation of three key features of Futhark that
seek a suitable middle ground with imperative
approaches. First, in order to express efficient code
inside the parallel constructs, we introduce a simple
type system for in-place updates that ensures
referential transparency and supports equational
reasoning. Second, we furnish Futhark with parallel
operators capable of expressing efficient
strength-reduced code, along with their fusion rules.
Third, we present a flattening transformation aimed at
enhancing the degree of parallelism that (i) builds on
loop interchange and distribution but uses higher-order
reasoning rather than array-dependence analysis, and
(ii) still allows further locality-of-reference
optimisations. Finally, an evaluation on 16 benchmarks
demonstrates the impact of the language and compiler
features and shows application-level performance
competitive with hand-written GPU code.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '17 conference proceedings.",
}
@Article{Jan:2017:ITF,
author = "Bilal Jan and Fiaz Gul Khan and Bartolomeo Montrucchio
and Anthony Theodore Chronopoulos and Shahaboddin
Shamshirband and Abdul Nasir Khan",
title = "Introducing {ToPe--FFT}: An {OpenCL}-based {FFT}
library targeting {GPUs}",
journal = j-CCPE,
volume = "29",
number = "21",
pages = "??--??",
day = "10",
month = nov,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4256",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Dec 30 09:11:58 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Jarzabek:2017:PEU,
author = "Lukasz Jarzabek and Pawel Czarnul",
title = "Performance evaluation of unified memory and dynamic
parallelism for selected parallel {CUDA} applications",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "12",
pages = "5378--5401",
month = dec,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-017-2091-x",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jan 6 08:59:18 MST 2018",
bibsource = "http://link.springer.com/journal/11227/73/12;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/s11227-017-2091-x.pdf",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Jatala:2017:SSG,
author = "Vishwesh Jatala and Jayvant Anantpur and Amey
Karkare",
title = "Scratchpad Sharing in {GPUs}",
journal = j-TACO,
volume = "14",
number = "2",
pages = "15:1--15:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3075619",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Mon Jul 24 18:00:59 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "General-Purpose Graphics Processing Unit (GPGPU)
applications exploit on-chip scratchpad memory
available in the Graphics Processing Units (GPUs) to
improve performance. The amount of thread level
parallelism (TLP) present in the GPU is limited by the
number of resident threads, which in turn depends on
the availability of scratchpad memory in its streaming
multiprocessor (SM). Since the scratchpad memory is
allocated at thread block granularity, part of the
memory may remain unutilized. In this article, we
propose architectural and compiler optimizations to
improve the scratchpad memory utilization. Our
approach, called Scratchpad Sharing, addresses
scratchpad under-utilization by launching additional
thread blocks in each SM. These thread blocks use
unutilized scratchpad memory and also share scratchpad
memory with other resident blocks. To improve the
performance of scratchpad sharing, we propose Owner
Warp First (OWF) scheduling that schedules warps from
the additional thread blocks effectively. The
performance of this approach, however, is limited by
the availability of the part of scratchpad memory that
is shared among thread blocks. We propose compiler
optimizations to improve the availability of shared
scratchpad memory. We describe an allocation scheme
that helps in allocating scratchpad variables such that
shared scratchpad is accessed for short duration. We
introduce a new hardware instruction, relssp, that when
executed releases the shared scratchpad memory.
Finally, we describe an analysis for optimal placement
of relssp instructions, such that shared scratchpad
memory is released as early as possible, but only after
its last use, along every execution path. We
implemented the hardware changes required for
scratchpad sharing and the relssp instruction using the
GPGPU-Sim simulator and implemented the compiler
optimizations in Ocelot framework. We evaluated the
effectiveness of our approach on 19 kernels from 3
benchmarks suites: CUDA-SDK, GPGPU-Sim, and Rodinia.
The kernels that under-utilize scratchpad memory show
an average improvement of 19\% and maximum improvement
of 92.17\% in terms of the number of instruction
executed per cycle when compared to the baseline
approach, without affecting the performance of the
kernels that are not limited by scratchpad memory.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Jo:2017:PMA,
author = "Gangwon Jo and Jaehoon Jung and Jiyoung Park and
Jaejin Lee",
title = "{Poster}: {MAPA}: an Automatic Memory Access Pattern
Analyzer for {GPU} Applications",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "443--444",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019034",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Various existing optimization and memory consistency
management techniques for GPU applications rely on
memory access patterns of kernels. However, they suffer
from poor practicality because they require explicit
user interventions to extract kernel memory access
patterns. This paper proposes an automatic
memory-access-pattern analysis framework called MAPA.
MAPA is based on a source-level analysis technique
derived from traditional symbolic analyses and a
run-time pattern selection technique. The experimental
results show that MAPA properly analyzes 116 real-world
OpenCL kernels from Rodinia and Parboil.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Julian-Moreno:2017:FPA,
author = "Guillermo Juli{\'a}n-Moreno and Jorge E. L{\'o}pez de
Vergara and Iv{\'a}n Gonz{\'a}lez and Luis de Pedro and
Javier Royuela-del-Val and Federico
Simmross-Wattenberg",
title = "Fast parallel $ \alpha $-stable distribution function
evaluation and parameter estimation using {OpenCL} in
{GPGPUs}",
journal = j-STAT-COMPUT,
volume = "27",
number = "5",
pages = "1365--1382",
month = sep,
year = "2017",
CODEN = "STACE3",
ISSN = "0960-3174 (print), 1573-1375 (electronic)",
ISSN-L = "0960-3174",
bibdate = "Thu Jun 8 18:03:56 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/statcomput.bib",
acknowledgement = ack-nhfb,
fjournal = "Statistics and Computing",
journal-URL = "http://link.springer.com/journal/11222",
}
@Article{Katouda:2017:MOH,
author = "Michio Katouda and Takahito Nakajima",
title = "{MPI\slash OpenMP} hybrid parallel algorithm for
resolution of identity second-order
{M{\o}ller--Plesset} perturbation calculation of
analytical energy gradient for massively parallel
multicore supercomputers",
journal = j-J-COMPUT-CHEM,
volume = "38",
number = "8",
pages = "489--507",
day = "30",
month = mar,
year = "2017",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.24701",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Mon Feb 20 11:51:05 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputchem2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651",
}
@Article{Khan:2017:RCS,
author = "Ayaz H. Khan and Mayez Al-Mouhamed and Muhammed
Al-Mulhem and Adel F. Ahmed",
title = "{RT-CUDA}: A Software Tool for {CUDA} Code
Restructuring",
journal = j-INT-J-PARALLEL-PROG,
volume = "45",
number = "3",
pages = "551--594",
month = jun,
year = "2017",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-016-0433-6",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jun 24 11:37:59 MDT 2017",
bibsource = "http://link.springer.com/journal/10766/45/3;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Kojima:2017:HLG,
author = "Kensuke Kojima and Atsushi Igarashi",
title = "A {Hoare} Logic for {GPU} Kernels",
journal = j-TOCL,
volume = "18",
number = "1",
pages = "3:1--3:??",
month = apr,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3001834",
ISSN = "1529-3785 (print), 1557-945X (electronic)",
ISSN-L = "1529-3785",
bibdate = "Thu Apr 13 17:53:54 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocl/;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tocl.bib",
abstract = "We study a Hoare Logic to reason about parallel
programs executed on graphics processing units (GPUs),
called GPU kernels. During the execution of GPU
kernels, multiple threads execute in lockstep, that is,
execute the same instruction simultaneously. When the
control branches, the two branches are executed
sequentially, but during the execution of each branch
only those threads that take it are enabled; after the
control converges, all the threads are enabled and
again execute in lockstep. In this article, we first
consider a semantics in which all threads execute in
lockstep (this semantics simplifies the actual
execution model of GPUs) and adapt Hoare Logic to this
setting by augmenting the usual Hoare triples with an
additional component representing the set of enabled
threads. It is determined that the soundness and
relative completeness of the logic do not hold for all
programs; a difficulty arises from the fact that one
thread can invalidate the loop termination condition of
another thread through shared memory. We overcome this
difficulty by identifying an appropriate class of
programs for which the soundness and relative
completeness hold. Additionally, we discuss thread
interleaving, which is present in the actual execution
of GPUs but not in the lockstep semantics mentioned
above. We show that if a program is race free, then the
lockstep and interleaving semantics produce the same
result. This implies that our logic is sound and
relatively complete for race-free programs, even if the
thread interleaving is taken into account.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computational Logic",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J773",
}
@Article{Kotselidis:2017:HMR,
author = "Christos Kotselidis and James Clarkson and Andrey
Rodchenko and Andy Nisbet and John Mawer and Mikel
Luj{\'a}n",
title = "Heterogeneous Managed Runtime Systems: a Computer
Vision Case Study",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "74--82",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050764",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Real-time 3D space understanding is becoming prevalent
across a wide range of applications and hardware
platforms. To meet the desired Quality of Service
(QoS), computer vision applications tend to be heavily
parallelized and exploit any available hardware
accelerators. Current approaches to achieving real-time
computer vision, evolve around programming languages
typically associated with High Performance Computing
along with binding extensions for OpenCL or CUDA
execution. Such implementations, although high
performing, lack portability across the wide range of
diverse hardware resources and accelerators. In this
paper, we showcase how a complex computer vision
application can be implemented within a managed runtime
system. We discuss the complexities of achieving
high-performing and portable execution across embedded
and desktop configurations. Furthermore, we demonstrate
that it is possible to achieve the QoS target of over
30 frames per second (FPS) by exploiting FPGA and GPGPU
acceleration transparently through the managed runtime
system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "VEE '17 conference proceedings.",
}
@Article{Kouetcha:2017:USP,
author = "Daniella Nguemalieu Kouetcha and Hamidr{\'e}za
Ram{\'e}zani and Nathalie Cohaut",
title = "Ultrafast scalable parallel algorithm for the radial
distribution function histogramming using {MPI} maps",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "4",
pages = "1629--1653",
month = apr,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1854-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:33 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/73/4;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Li:2017:PCO,
author = "Shigang Li and Yunquan Zhang and Torsten Hoefler",
title = "{Poster}: Cache-Oblivious {MPI} All-to-All
Communications on Many-Core Architectures",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "445--446",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3019025",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "In the many-core era, the performance of MPI
collectives is more dependent on the intra-node
communication component. However, the communication
algorithms generally inherit from the inter-node
version and ignore the cache complexity. We propose
cache-oblivious algorithms for MPI all-to-all
operations, in which data blocks are copied into the
receive buffers in Morton order to exploit data
locality. Experimental results on different many-core
architectures show that our cache-oblivious
implementations significantly outperform the naive
implementations based on shared heap and the highly
optimized MPI libraries.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Losada:2017:ARV,
author = "Nuria Losada and Mar{\'\i}a J. Mart{\'\i}n and
Patricia Gonz{\'a}lez",
title = "Assessing resilient versus stop-and-restart
fault-tolerant solutions in {MPI} applications",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "1",
pages = "316--329",
month = jan,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1863-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:31 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/73/1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Losada:2017:RMA,
author = "Nuria Losada and Iv{\'a}n Cores and Mar{\'\i}a J.
Mart{\'\i}n and Patricia Gonz{\'a}lez",
title = "Resilient {MPI} applications using an
application-level checkpointing framework and {ULFM}",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "1",
pages = "100--113",
month = jan,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1629-7",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jun 24 10:31:31 MDT 2017",
bibsource = "http://link.springer.com/journal/11227/73/1;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Maier:2017:OLD,
author = "Andrew J. Maier and Bruce F. Cockburn",
title = "Optimization of Low-Density Parity Check decoder
performance for {OpenCL} designs synthesized to
{FPGAs}",
journal = j-J-PAR-DIST-COMP,
volume = "107",
number = "??",
pages = "134--145",
month = sep,
year = "2017",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Aug 19 13:10:31 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731517301004",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Malakar:2017:DMO,
author = "Preeti Malakar and Venkatram Vishwanath",
title = "Data movement optimizations for independent {MPI}
{I/O} on the {Blue Gene/Q}",
journal = j-PARALLEL-COMPUTING,
volume = "61",
number = "??",
pages = "35--51",
month = jan,
year = "2017",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Sat Feb 4 08:48:35 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S016781911630062X",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191/",
}
@Article{Manwade:2017:DFA,
author = "Karveer B. Manwade and Dinesh B. Kulkarni",
title = "Data Flow Analysis of {MPI} Program Using Dynamic
Analysis Technique with Partial Execution",
journal = j-SCPE,
volume = "18",
number = "4",
pages = "375--385",
month = "????",
year = "2017",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:49 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1335",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Marin:2017:ERF,
author = "Manuel Marin and David Defour and Federico Milano",
title = "An Efficient Representation Format for Fuzzy Intervals
Based on Symmetric Membership Functions",
journal = j-TOMS,
volume = "43",
number = "3",
pages = "23:1--23:??",
month = jan,
year = "2017",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2939364",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Oct 4 10:55:07 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=2939364",
abstract = "This article addresses the execution cost of
arithmetic operations with a focus on fuzzy arithmetic.
Thanks to an appropriate representation format for
fuzzy intervals, we show that it is possible to halve
the number of operations and divide by 2 to 8 the
memory requirements compared to conventional solutions.
In addition, we demonstrate the benefit of some
hardware features encountered in today's accelerators
(GPU) such as static rounding, memory usage,
instruction-level parallelism (ILP), and thread-level
parallelism (TLP). We then describe a library of fuzzy
arithmetic operations written in CUDA and C++. The
library is evaluated against traditional approaches
using compute-bound and memory-bound benchmarks on
Nvidia GPUs, with an observed performance gain of 2 to
20.",
acknowledgement = ack-nhfb,
articleno = "23",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Matheou:2017:DDC,
author = "George Matheou and Paraskevas Evripidou",
title = "Data-Driven Concurrency for High Performance
Computing",
journal = j-TACO,
volume = "14",
number = "4",
pages = "53:1--53:??",
month = dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3162014",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Dec 22 18:25:55 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "In this work, we utilize dynamic dataflow/data-driven
techniques to improve the performance of high
performance computing (HPC) systems. The proposed
techniques are implemented and evaluated through an
efficient, portable, and robust programming framework
that enables data-driven concurrency on HPC systems.
The proposed framework is based on data-driven
multithreading (DDM), a hybrid control-flow/dataflow
model that schedules threads based on data availability
on sequential processors. The proposed framework was
evaluated using several benchmarks, with different
characteristics, on two different systems: a 4-node AMD
system with a total of 128 cores and a 64-node Intel
HPC system with a total of 768 cores. The performance
evaluation shows that the proposed framework scales
well and tolerates scheduling overheads and memory
latencies effectively. We also compare our framework to
MPI, DDM-VM, and OmpSs@Cluster. The comparison results
show that the proposed framework obtains comparable or
better performance.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Meister:2017:PME,
author = "Oliver Meister and Kaveh Rahnema and Michael Bader",
title = "Parallel Memory-Efficient Adaptive Mesh Refinement on
Structured Triangular Meshes with Billions of Grid
Cells",
journal = j-TOMS,
volume = "43",
number = "3",
pages = "19:1--19:27",
month = jan,
year = "2017",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2947668",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Oct 4 10:55:07 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=2947668",
abstract = "We present sam(oa) 2, a software package for a
dynamically adaptive, parallel solution of 2D partial
differential equations on triangular grids created via
newest vertex bisection. An element order imposed by
the Sierpinski space-filling curve provides an
algorithm for grid generation, refinement, and
traversal that is inherently memory efficient. Based
purely on stack and stream data structures, it
completely avoids random memory access. Using an
element-oriented data view suitable for local
operators, concrete simulation scenarios are
implemented based on control loops and event hooks,
which hide the complexity of the underlying traversal
scheme. Two case studies are presented: two-phase flow
in heterogeneous porous media and tsunami wave
propagation, demonstrated on the Tohoku tsunami 2011 in
Japan. sam(oa) 2 features hybrid MPI+OpenMP
parallelization based on the Sierpinski order induced
on the elements. Sections defined by contiguous grid
cells define atomic tasks for OpenMP work sharing and
stealing, as well as for migration of grid cells
between MPI processes. Using optimized communication
and load balancing algorithms, sam(oa) 2 achieves 88\%
strong scaling efficiency from 16 to 512 cores and 92\%
efficiency in a weak scaling test on 8,192 cores with
10 billion elements-all tests including adaptive mesh
refinement and load balancing in each time step.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Mendonca:2017:DAA,
author = "Gleison Mendon{\c{c}}a and Breno Guimar{\~a}es and
P{\'e}ricles Alves and M{\'a}rcio Pereira and Guido
Ara{\'u}jo and Fernando Magno Quint{\~a}o Pereira",
title = "{DawnCC}: Automatic Annotation for Data Parallelism
and Offloading",
journal = j-TACO,
volume = "14",
number = "2",
pages = "13:1--13:??",
month = jul,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3084540",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Mon Jul 24 18:00:59 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Directive-based programming models, such as OpenACC
and OpenMP, allow developers to convert a sequential
program into a parallel one with minimum human
intervention. However, inserting pragmas into
production code is a difficult and error-prone task,
often requiring familiarity with the target program.
This difficulty restricts the ability of developers to
annotate code that they have not written themselves.
This article provides a suite of compiler-related
methods to mitigate this problem. Such techniques rely
on symbolic range analysis, a well-known static
technique, to achieve two purposes: populate source
code with data transfer primitives and to disambiguate
pointers that could hinder automatic parallelization
due to aliasing. We have materialized our ideas into a
tool, DawnCC, which can be used stand-alone or through
an online interface. To demonstrate its effectiveness,
we show how DawnCC can annotate the programs available
in PolyBench without any intervention from users. Such
annotations lead to speedups of over $ 100 \times $ in
an Nvidia architecture and over $ 50 \times $ in an ARM
architecture.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Montella:2017:VCB,
author = "Raffaele Montella and Giulio Giunta and Giuliano
Laccetti and Marco Lapegna and Carlo Palmieri and
Carmine Ferraro and Valentina Pelliccia and Cheol-Ho
Hong and Ivor Spence and Dimitrios S. Nikolopoulos",
title = "On the Virtualization of {CUDA} Based {GPU} Remoting
on {ARM} and x86 Machines in the {GVirtuS} Framework",
journal = j-INT-J-PARALLEL-PROG,
volume = "45",
number = "5",
pages = "1142--1163",
month = oct,
year = "2017",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-016-0462-1",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Nov 18 09:27:28 MST 2017",
bibsource = "http://link.springer.com/journal/10766/45/5;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Moreira:2017:FCR,
author = "Rubens E. A. Moreira and Sylvain Collange and Fernando
Magno Quint{\~a}o Pereira",
title = "Function Call Re-Vectorization",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "313--326",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018751",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Programming languages such as C for CUDA, OpenCL or
ISPC have contributed to increase the programmability
of SIMD accelerators and graphics processing units.
However, these languages still lack the flexibility
offered by low-level SIMD programming on explicit
vectors. To close this expressiveness gap while
preserving performance, this paper introduces the
notion of \ourinvention{} (CREV). CREV allows changing
the dimension of vectorization during the execution of
a kernel, exposing it as a nested parallel kernel call.
CREV affords programmability close to dynamic
parallelism, a feature that allows the invocation of
kernels from inside kernels, but at much lower cost. In
this paper, we present a formal semantics of CREV, and
an implementation of it on the ISPC compiler. We have
used CREV to implement some classic algorithms,
including string matching, depth first search and
Bellman-Ford, with minimum effort. These algorithms,
once compiled by ISPC to Intel-based vector
instructions, are as fast as state-of-the-art
implementations, yet much simpler. Thus, CREV gives
developers the elegance of dynamic programming, and the
performance of explicit SIMD programming.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Mossaiby:2017:OIH,
author = "F. Mossaiby and A. Shojaei and M. Zaccariotto and U.
Galvanetto",
title = "{OpenCL} implementation of a high performance {$3$D}
Peridynamic model on graphics accelerators",
journal = j-COMPUT-MATH-APPL,
volume = "74",
number = "8",
pages = "1856--1870",
day = "15",
month = oct,
year = "2017",
CODEN = "CMAPDK",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
ISSN-L = "0898-1221",
bibdate = "Sat Jan 13 11:04:24 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122117304030",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Neugebauer:2017:PAR,
author = "Olaf Neugebauer and Michael Engel and Peter Marwedel",
title = "A parallelization approach for resource-restricted
embedded heterogeneous {MPSoCs} inspired by {OpenMP}",
journal = j-J-SYST-SOFTW,
volume = "125",
number = "??",
pages = "439--448",
month = mar,
year = "2017",
CODEN = "JSSODM",
ISSN = "0164-1212 (print), 1873-1228 (electronic)",
ISSN-L = "0164-1212",
bibdate = "Sat Feb 4 12:20:39 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsystsoftw.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "//www.sciencedirect.com/science/article/pii/S0164121216301534",
acknowledgement = ack-nhfb,
fjournal = "Journal of Systems and Software",
journal-URL = "http://www.sciencedirect.com/science/journal/01641212/",
}
@Article{Nguyen:2017:ATM,
author = "Tan Nguyen and Pietro Cicotti and Eric Bylaska and Dan
Quinlan and Scott Baden",
title = "Automatic translation of {MPI} source into a
latency-tolerant, data-driven form",
journal = j-J-PAR-DIST-COMP,
volume = "106",
number = "??",
pages = "1--13",
month = aug,
year = "2017",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Aug 19 13:10:31 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731517300771",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Omar:2017:PSF,
author = "Cyrus Omar and Jonathan Aldrich",
title = "Programmable semantic fragments: the design and
implementation of {\tt typy}",
journal = j-SIGPLAN,
volume = "52",
number = "3",
pages = "81--92",
month = mar,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093335.2993245",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper introduces typy, a statically typed
programming language embedded by reflection into
Python. typy features a fragmentary semantics, i.e. it
delegates semantic control over each term, drawn from
Python's fixed concrete and abstract syntax, to some
contextually relevant user-defined semantic fragment.
The delegated fragment programmatically (1) typechecks
the term (following a bidirectional protocol); and (2)
assigns dynamic meaning to the term by computing a
translation to Python. We argue that this design is
expressive with examples of fragments that express the
static and dynamic semantics of (1) functional records;
(2) labeled sums (with nested pattern matching a la
ML); (3) a variation on JavaScript's prototypal object
system; and (4) typed foreign interfaces to Python and
OpenCL. These semantic structures are, or would need to
be, defined primitively in conventionally structured
languages. We further argue that this design is
compositionally well-behaved. It avoids the expression
problem and the problems of grammar composition because
the syntax is fixed. Moreover, programs are
semantically stable under fragment composition (i.e.
defining a new fragment will not change the meaning of
existing program components.)",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "GPCE '16 conference proceedings.",
}
@Article{Pereira:2017:SBC,
author = "Phillipe Pereira and Higo Albuquerque and Isabela da
Silva and Hendrio Marques and Felipe Monteiro and
Ricardo Ferreira and Lucas Cordeiro",
title = "{SMT}-based context-bounded model checking for {CUDA}
programs",
journal = j-CCPE,
volume = "29",
number = "22",
pages = "??--??",
day = "25",
month = nov,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3934",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Dec 30 09:11:59 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Qawasmeh:2017:PPR,
author = "Ahmad Qawasmeh and Maxime R. Hugues and Henri Calandra
and Barbara M. Chapman",
title = "Performance portability in reverse time migration and
seismic modelling via {OpenACC}",
journal = j-IJHPCA,
volume = "31",
number = "5",
pages = "422--440",
month = sep,
year = "2017",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Sat Jan 6 10:31:59 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Rathgeber:2017:FAF,
author = "Florian Rathgeber and David A. Ham and Lawrence
Mitchell and Michael Lange and Fabio Luporini and
Andrew T. T. Mcrae and Gheorghe-Teodor Bercea and
Graham R. Markall and Paul H. J. Kelly",
title = "{Firedrake}: Automating the Finite Element Method by
Composing Abstractions",
journal = j-TOMS,
volume = "43",
number = "3",
pages = "24:1--24:??",
month = jan,
year = "2017",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/2998441",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Oct 4 10:55:07 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=2998441",
abstract = "Firedrake is a new tool for automating the numerical
solution of partial differential equations. Firedrake
adopts the domain-specific language for the finite
element method of the FEniCS project, but with a pure
Python runtime-only implementation centered on the
composition of several existing and new abstractions
for particular aspects of scientific computing. The
result is a more complete separation of concerns that
eases the incorporation of separate contributions from
computer scientists, numerical analysts, and
application specialists. These contributions may add
functionality or improve performance. Firedrake
benefits from automatically applying new optimizations.
This includes factorizing mixed function spaces,
transforming and vectorizing inner loops, and
intrinsically supporting block matrix operations.
Importantly, Firedrake presents a simple public API for
escaping the UFL abstraction. This allows users to
implement common operations that fall outside of pure
variational formulations, such as flux limiters.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Rejitha:2017:EPC,
author = "R. S. Rejitha and Shajulin Benedict and Suja A. Alex
and Shany Infanto",
title = "Energy prediction of {CUDA} application instances
using dynamic regression models",
journal = j-COMPUTING,
volume = "99",
number = "8",
pages = "765--790",
month = aug,
year = "2017",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-016-0534-5",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Fri Feb 9 14:54:09 MST 2018",
bibsource = "http://link.springer.com/journal/607/99/8;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Rizzardi:2017:ATS,
author = "Mariarosaria Rizzardi",
title = "{Algorithm 981}: {Talbot Suite DE}: Application of
Modified {Talbot}'s Method to Solve Differential
Problems",
journal = j-TOMS,
volume = "44",
number = "2",
pages = "18:1--18:23",
month = sep,
year = "2017",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3089248",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Sep 19 17:19:59 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "http://dl.acm.org/citation.cfm?id=3089248",
abstract = "In order to solve a differential problem, the Laplace
Transform method, when applicable, replaces the problem
with a simpler one; the solution is obtained by solving
the new problem and then by computing the inverse
Laplace Transform of this function. In a numerical
context, since the solution of the transformed problem
consists of a sequence of Laplace Transform samples,
most of the software for the numerical inversion cannot
be used since the transform, among parameters, must be
passed as a function. To fill this gap, we present
Talbot Suite DE, a C software collection for Laplace
Transform inversions, specifically designed for these
problems and based on Talbot's method. It contains both
sequential and parallel implementations; the latter is
accomplished by means of OpenMP. We also report some
performance results. Aimed at non-expert users, the
software is equipped with several examples and a User
Guide that includes the external documentation,
explains how to use all the sample code, and reports
its results about accuracy and efficiency. Some
examples are entirely in C and others combine different
programming languages (C/MATLAB, C/FORTRAN). The User
Guide also contains useful hints to avoid possible
errors issued during the compilation or execution of
mixed-language code.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Mathematical Software",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Russo:2017:MPG,
author = "Igor L. S. Russo and Heder S. Bernardino and Helio J.
C. Barbosa",
title = "A massively parallel Grammatical Evolution technique
with {OpenCL}",
journal = j-J-PAR-DIST-COMP,
volume = "109",
number = "??",
pages = "333--349",
month = nov,
year = "2017",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Sat Aug 19 13:10:32 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S074373151730206X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Sato:2017:NIT,
author = "Kento Sato and Dong H. Ahn and Ignacio Laguna and
Gregory L. Lee and Martin Schulz and Christopher M.
Chambreau",
title = "Noise Injection Techniques to Expose Subtle and
Unintended Message Races",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "89--101",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018767",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Debugging intermittently occurring bugs within MPI
applications is challenging, and message races, a
condition in which two or more sends race to match with
a receive, are one of the common root causes. Many
debugging tools have been proposed to help programmers
resolve them, but their runtime interference perturbs
the timing such that subtle races often cannot be
reproduced with debugging tools. We present novel noise
injection techniques to expose message races even under
a tool's control. We first formalize this race problem
in the context of non-deterministic parallel
applications and use this analysis to determine an
effective noise-injection strategy to uncover them. We
codified these techniques in NINJA (Noise INJection
Agent) that exposes these races without modification to
the application. Our evaluations on synthetic cases as
well as a real-world bug in Hypre-2.10.1 show that
NINJA significantly helps expose races.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Schardl:2017:TEF,
author = "Tao B. Schardl and William S. Moses and Charles E.
Leiserson",
title = "{Tapir}: Embedding Fork-Join Parallelism into {LLVM}'s
Intermediate Representation",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "249--265",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018758",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This paper explores how fork-join parallelism, as
supported by concurrency platforms such as Cilk and
OpenMP, can be embedded into a compiler's intermediate
representation (IR). Mainstream compilers typically
treat parallel linguistic constructs as syntactic sugar
for function calls into a parallel runtime. These calls
prevent the compiler from performing optimizations
across parallel control constructs. Remedying this
situation is generally thought to require an extensive
reworking of compiler analyses and code transformations
to handle parallel semantics. Tapir is a compiler IR
that represents logically parallel tasks asymmetrically
in the program's control flow graph. Tapir allows the
compiler to optimize across parallel control constructs
with only minor changes to its existing analyses and
code transformations. To prototype Tapir in the LLVM
compiler, for example, we added or modified about 6000
lines of LLVM's 4-million-line codebase. Tapir enables
LLVM's existing compiler optimizations for serial code
--- including loop-invariant-code motion,
common-subexpression elimination, and tail-recursion
elimination --- to work with parallel control
constructs such as spawning and parallel loops. Tapir
also supports parallel optimizations such as loop
scheduling.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Schmitt:2017:SCP,
author = "Felix Schmitt and Robert Dietrich and Guido
Juckeland",
title = "Scalable critical-path analysis and optimization
guidance for hybrid {MPI--CUDA} applications",
journal = j-IJHPCA,
volume = "31",
number = "6",
pages = "485--498",
month = nov,
year = "2017",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Sat Jan 6 10:31:59 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Sharma:2017:PDR,
author = "Prateek Sharma and David Irwin and Prashant Shenoy",
title = "Portfolio-driven Resource Management for Transient
Cloud Servers",
journal = j-POMACS,
volume = "1",
number = "1",
pages = "5:1--5:??",
month = jun,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3084442",
ISSN = "2476-1249",
ISSN-L = "2476-1249",
bibdate = "Fri Jun 16 09:11:52 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pomacs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://dl.acm.org/citation.cfm?id=3084442",
abstract = "Cloud providers have begun to offer their surplus
capacity in the form of low-cost transient servers,
which can be revoked unilaterally at any time. While
the low cost of transient servers makes them attractive
for a wide range of applications, such as data
processing and scientific computing, failures due to
server revocation can severely degrade application
performance. Since different transient server types
offer different cost and availability tradeoffs, we
present the notion of server portfolios that is based
on financial portfolio modeling. Server portfolios
enable construction of an 'optimal' mix of severs to
meet an application's sensitivity to cost and
revocation risk. We implement model-driven portfolios
in a system called ExoSphere, and show how diverse
applications can use portfolios and
application-specific policies to gracefully handle
transient servers. We show that ExoSphere enables
widely-used parallel applications such as Spark, MPI,
and BOINC to be made transiency-aware with modest
effort. Our experiments show that allowing the
applications to use suitable transiency-aware policies,
ExoSphere is able to achieve 80\% cost savings when
compared to on-demand servers and greatly reduces
revocation risk compared to existing approaches.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "Proceedings of the ACM on Measurement and Analysis of
Computing Systems (POMACS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J1567",
}
@Article{Silla:2017:BRG,
author = "Federico Silla and Sergio Iserte and Carlos Rea{\~n}o
and Javier Prades",
title = "On the benefits of the remote {GPU} virtualization
mechanism: The {rCUDA} case",
journal = j-CCPE,
volume = "29",
number = "13",
pages = "",
day = "10",
month = jul,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4072",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Jul 24 08:22:38 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Singh:2017:EER,
author = "Amit Kumar Singh and Alok Prakash and Karunakar Reddy
Basireddy and Geoff V. Merrett and Bashir M.
Al-Hashimi",
title = "Energy-Efficient Run-Time Mapping and Thread
Partitioning of Concurrent {OpenCL} Applications on
{CPU--GPU MPSoCs}",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "147:1--147:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126548",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous Multi-Processor Systems-on-Chips
(MPSoCs) containing CPU and GPU cores are typically
required to execute applications concurrently. However,
as will be shown in this paper, existing approaches are
not well suited for concurrent applications as they are
developed either by considering only a single
application or they do not exploit both CPU and GPU
cores at the same time. In this paper, we propose an
energy-efficient run-time mapping and thread
partitioning approach for executing concurrent OpenCL
applications on both GPU and GPU cores while satisfying
performance requirements. Depending upon the
performance requirements, for each concurrently
executing application, the mapping process finds the
appropriate number of CPU cores and operating
frequencies of CPU and GPU cores, and the partitioning
process identifies an efficient partitioning of the
applications' threads between CPU and GPU cores. We
validate the proposed approach experimentally on the
Odroid-XU3 hardware platform with various mixes of
applications from the Polybench benchmark suite.
Additionally, a case-study is performed with a
real-world application SLAMBench. Results show an
average energy saving of 32\% compared to existing
approaches while still satisfying the performance
requirements.",
acknowledgement = ack-nhfb,
articleno = "147",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840",
}
@Article{Sotomayor:2017:ACG,
author = "Rafael Sotomayor and Luis Miguel Sanchez and Javier
Garcia Blas and Javier Fernandez and J. Daniel Garcia",
title = "Automatic {CPU\slash GPU} Generation of
Multi-versioned {OpenCL} Kernels for {C++} Scientific
Applications",
journal = j-INT-J-PARALLEL-PROG,
volume = "45",
number = "2",
pages = "262--282",
month = apr,
year = "2017",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-016-0425-6",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Mon Mar 13 15:25:22 MDT 2017",
bibsource = "http://link.springer.com/journal/10766/45/2;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s10766-016-0425-6",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Steele:2017:UBP,
author = "Guy L. {Steele, Jr.} and Jean-Baptiste Tristan",
title = "Using Butterfly-Patterned Partial Sums to Draw from
Discrete Distributions",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "341--355",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018757",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We describe a SIMD technique for drawing values from
multiple discrete distributions, such as sampling from
the random variables of a mixture model, that avoids
computing a complete table of partial sums of the
relative probabilities. A table of alternate
(``butterfly-patterned'') form is faster to compute,
making better use of coalesced memory accesses; from
this table, complete partial sums are computed on the
fly during a binary search. Measurements using CUDA 7.5
on an NVIDIA Titan Black GPU show that this technique
makes an entire machine-learning application that uses
a Latent Dirichlet Allocation topic model with 1024
topics about about 13\% faster (when using
single-precision floating-point data) or about 35\%
faster (when using double-precision floating-point
data) than doing a straightforward matrix transposition
after using coalesced accesses.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Szo:2017:PET,
author = "M{\'a}t{\'e} Sz{\H{o}}ke and Tam{\'a}s Istv{\'a}n
J{\'o}zsa and {\'A}d{\'a}m Kolesz{\'a}r and Irene
Moulitsas and L{\'a}szl{\'o} K{\"o}n{\"o}zsy",
title = "Performance Evaluation of a Two-Dimensional Lattice
{Boltzmann} Solver Using {CUDA} and {PGAS} {UPC} Based
Parallelisation",
journal = j-TOMS,
volume = "44",
number = "1",
pages = "8:1--8:??",
month = jul,
year = "2017",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3085590",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Oct 4 10:55:07 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=3085590",
abstract = "The Unified Parallel C (UPC) language from the
Partitioned Global Address Space (PGAS) family unifies
the advantages of shared and local memory spaces and
offers a relatively straightforward code
parallelisation with the Central Processing Unit (CPU).
In contrast, the Computer Unified Device Architecture
(CUDA) development kit gives a tool to make use of the
Graphics Processing Unit (GPU). We provide a detailed
comparison between these novel techniques through the
parallelisation of a two-dimensional lattice Boltzmann
method based fluid flow solver. Our comparison between
the CUDA and UPC parallelisation takes into account the
required conceptual effort, the performance gain, and
the limitations of the approaches from the application
oriented developers' point of view. We demonstrated
that UPC led to competitive efficiency with the local
memory implementation. However, the performance of the
shared memory code fell behind our expectations, and we
concluded that the investigated UPC compilers could not
efficiently treat the shared memory space. The CUDA
implementation proved to be more complex compared to
the UPC approach mainly because of the complicated
memory structure of the graphics card which also makes
GPUs suitable for the parallelisation of the lattice
Boltzmann method.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Szoke:2017:PET,
author = "M{\'a}t{\'e} Sz{\H{o}}ke and Tam{\'a}s Istv{\'a}n
J{\'o}zsa and {\'A}d{\'a}m Kolesz{\'a}r and Irene
Moulitsas and L{\'a}szl{\'o} K{\"o}n{\"o}zsy",
title = "Performance Evaluation of a Two-Dimensional Lattice
{Boltzmann} Solver Using {CUDA} and {PGAS UPC} Based
Parallelisation",
journal = j-TOMS,
volume = "44",
number = "1",
pages = "8:1--8:22",
month = jul,
year = "2017",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3085590",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Fri Jul 14 16:39:28 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
abstract = "The Unified Parallel C (UPC) language from the
Partitioned Global Address Space (PGAS) family unifies
the advantages of shared and local memory spaces and
offers a relatively straightforward code
parallelisation with the Central Processing Unit (CPU).
In contrast, the Computer Unified Device Architecture
(CUDA) development kit gives a tool to make use of the
Graphics Processing Unit (GPU). We provide a detailed
comparison between these novel techniques through the
parallelisation of a two-dimensional lattice Boltzmann
method based fluid flow solver. Our comparison between
the CUDA and UPC parallelisation takes into account the
required conceptual effort, the performance gain, and
the limitations of the approaches from the application
oriented developers point of view. We demonstrated that
UPC led to competitive efficiency with the local memory
implementation. However, the performance of the shared
memory code fell behind our expectations, and we
concluded that the investigated UPC compilers could not
efficiently treat the shared memory space. The CUDA
implementation proved to be more complex compared to
the UPC approach mainly because of the complicated
memory structure of the graphics card which also makes
GPUs suitable for the parallelisation of the lattice
Boltzmann method.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Takafuji:2017:CCC,
author = "Daisuke Takafuji and Koji Nakano and Yasuaki Ito and
Jacir Bordim",
title = "{C2CU}: a {CUDA--C} program generator for bulk
execution of a sequential algorithm",
journal = j-CCPE,
volume = "29",
number = "17",
pages = "",
day = "10",
month = sep,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4022",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Sep 4 17:02:00 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Taylor:2017:AOO,
author = "Ben Taylor and Vicent Sanz Marco and Zheng Wang",
title = "Adaptive optimization for {OpenCL} programs on
embedded heterogeneous systems",
journal = j-SIGPLAN,
volume = "52",
number = "4",
pages = "11--20",
month = may,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140582.3081040",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:15 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Heterogeneous multi-core architectures consisting of
CPUs and GPUs are commonplace in today's embedded
systems. These architectures offer potential for energy
efficient computing if the application task is mapped
to the right core. Realizing such potential is
challenging due to the complex and evolving nature of
hardware and applications. This paper presents an
automatic approach to map OpenCL kernels onto
heterogeneous multi-cores for a given optimization
criterion --- whether it is faster runtime, lower
energy consumption or a trade-off between them. This is
achieved by developing a machine learning based
approach to predict which processor to use to run the
OpenCL kernel and the host program, and at what
frequency the processor should operate. Instead of
hand-tuning a model for each optimization metric, we
use machine learning to develop a unified framework
that first automatically learns the optimization
heuristic for each metric off-line, then uses the
learned knowledge to schedule OpenCL kernels at runtime
based on code and runtime information of the program.
We apply our approach to a set of representative OpenCL
benchmarks and evaluate it on an ARM big.LITTLE mobile
platform. Our approach achieves over 93\% of the
performance delivered by a perfect predictor.We obtain,
on average, 1.2x, 1.6x, and 1.8x improvement
respectively for runtime, energy consumption and the
energy delay product when compared to a comparative
heterogeneous-aware OpenCL task mapping scheme.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "LCTES '17 conference proceedings.",
}
@Article{Utterback:2017:POR,
author = "Robert Utterback and Kunal Agrawal and I-Ting Angelina
Lee and Milind Kulkarni",
title = "Processor-Oblivious Record and Replay",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "145--161",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018764",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Record-and-replay systems are useful tools for
debugging non-deterministic parallel programs by first
recording an execution and then replaying that
execution to produce the same access pattern. Existing
record-and-replay systems generally target thread-based
execution models, and record the behaviors and
interleavings of individual threads. Dynamic
multithreaded languages and libraries, such as the Cilk
family, OpenMP, TBB, etc., do not have a notion of
threads. Instead, these languages provide a
processor-oblivious model of programming, where
programs expose task-parallelism using high-level
constructs such as spawn/sync without regard to the
number of threads/cores available to run the program.
Thread-based record-and-replay would violate the
processor-oblivious nature of these programs, as they
incorporate the number of threads into the recorded
information, constraining the replayed execution to the
same number of threads. In this paper, we present a
processor-oblivious record-and-replay scheme for such
languages where record and replay can use different
number of processors and both are scheduled using work
stealing. We provide theoretical guarantees for our
record and replay scheme --- namely that record is
optimal for programs with one lock and replay is
near-optimal for all cases. In addition, we implemented
this scheme in the Cilk Plus runtime system and our
evaluation indicates that processor-obliviousness does
not cause substantial overheads.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Book{vanderPas:2017:UON,
author = "Ruud van der Pas",
title = "Using {OpenMP} --- the next step: affinity,
accelerators, tasking, and {SIMD}",
publisher = pub-MIT,
address = pub-MIT:adr,
pages = "xxi + 365",
year = "2017",
ISBN = "0-262-53478-9 (paperback)",
ISBN-13 = "978-0-262-53478-9 (paperback)",
LCCN = "QA76.642 .P427 2017",
bibdate = "Sat Oct 5 07:54:47 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = "Scientific and engineering computation",
abstract = "This book offers an up-to-date, practical tutorial on
advanced features in the widely used OpenMP parallel
programming model. Building on the previous volume,
Using OpenMP: Portable Shared Memory Parallel
Programming (MIT Press), this book goes beyond the
fundamentals to focus on what has been changed and
added to OpenMP since the 2.5 specifications. It
emphasizes four major and advanced areas: thread
affinity (keeping threads close to their data),
accelerators (special hardware to speed up certain
operations), tasking (to parallelize algorithms with a
less regular execution flow), and SIMD (hardware
assisted operations on vectors). As in the earlier
volume, the focus is on practical usage, with major new
features primarily introduced by example. Examples are
restricted to C and C++, but are straightforward enough
to be understood by Fortran programmers. After a brief
recap of OpenMP 2.5, the book reviews enhancements
introduced since 2.5. It then discusses in detail
tasking, a major functionality enhancement; Non-Uniform
Memory Access (NUMA) architectures, supported by
OpenMP; SIMD, or Single Instruction Multiple Data;
heterogeneous systems, a new parallel programming model
to offload computation to accelerators; and the
expected further development of OpenMP.",
acknowledgement = ack-nhfb,
subject = "Parallel programming (Computer science); Application
program interfaces (Computer software); OpenMP
(Application program interface)",
tableofcontents = "Intro \\
Contents \\
Series Foreword \\
Foreword \\
Preface \\
1 A Recap of OpenMP 2.5 \\
1.1 OpenMP Directives and Syntax \\
1.2 Creating a Parallel Program with OpenMP \\
1.2.1 The Parallel Region \\
1.2.2 The OpenMP Execution Model \\
1.2.3 The OpenMP Memory Model \\
1.3 The Worksharing Constructs \\
1.3.1 The Loop Construct \\
1.3.2 The Sections Construct \\
1.3.3 The Single Construct \\
1.3.4 The Fortran Workshare Construct \\
1.3.5 The Combined Worksharing Constructs \\
1.4 The Master Construct \\
1.5 Nested Parallelism \\
1.6 Synchronization Constructs \\
1.6.1 The Barrier Construct \\
1.6.2 The Critical Construct \\
1.6.3 The Atomic Construct \\
1.6.4 The Ordered Construct \\
1.7 The OpenMP 2.5 Environment Variables \\
1.8 The OpenMP 2.5 Runtime Functions \\
1.9 Internal Control Variables in OpenMP \\
1.10 Concluding Remarks \\
2 New Features in OpenMP \\
2.1 Enhancements to Existing Constructs \\
2.1.1 The Schedule Clause \\
2.1.2 The If Clause \\
2.1.3 The Collapse Clause \\
2.1.4 The Linear Clause \\
2.1.5 The Critical Construct \\
2.1.6 The Atomic Construct \\
2.2 New Environment Variables \\
2.3 New Runtime Functions \\
2.3.1 Runtime Functions for Thread Management, Thread
Scheduling, and Nested Parallelism \\
2.3.2 Runtime Functions for Tasking, Cancellation, and
Thread Affinity \\
2.3.3 Runtime Functions for Locking \\
2.3.4 Runtime Functions for Heterogeneous Systems \\
2.3.5 Usage Examples of the New Runtime Functions \\
2.4 New Functionality \\
2.4.1 Changed Ownership of Locks \\
2.4.2 Cancellation \\
2.4.3 User-Defined Reduction \\
2.4.4 The Doacross Loop \\
2.5 Concluding Remarks \\
3 Tasking \\
3.1 Hello Task \\
3.1.1 Parallelizing a Palindrome \\
3.1.2 Parallelizing a Sentence with a Palindrome \\
3.1.3 Closing Comments on the Palindrome Example \\
3.2 Using Tasks to Parallelize a Linked List \\
3.2.1 The Sequential Version of the Linked List Program
\\
3.2.2 The Parallel Version of the Linked List Program
\\
3.2.3 Closing Comments on the Linked List Example \\
3.3 Sorting Things Out with Tasks \\
3.3.1 The Sequential Quicksort Algorithm \\
3.3.2 The OpenMP Quicksort Algorithm \\
3.3.3 Fine-Tuning the OpenMP Quicksort Algorithm \\
3.3.4 Closing Comments on the OpenMP Quicksort
Algorithm \\
3.4 Overlapping I/O and Computations Using Tasks \\
3.4.1 Using Tasks and Task Dependences \\
3.4.2 Using the Taskloop Construct \\
3.4.3 Closing Comments on the Pipeline Example \\
3.5 The Data Environment with Tasks \\
3.6 What is a Task? \\
3.7 Task Creation, Synchronization, and Scheduling \\
3.8 The Taskloop Construct \\
3.9 Concluding Remarks \\
4 Thread Affinity \\
4.1 The Characteristics of a cc-NUMA Architecture \\
4.2 First Touch Data Placement \\
4.2.1 The Pros and Cons of First Touch Data Placement
\\
4.2.2 How to Exploit the First Touch Policy \\
4.3 The Need for Thread Affinity Support \\
4.4 The OpenMP Thread Affinity Philosophy \\
4.5 The OpenMP Places Concept \\
4.5.1 Defining OpenMP Places Using Sets with Numbers
\\
4.5.2 The OpenMP Place List \\
4.5.3 Defining OpenMP Places Using Abstract Names",
}
@Article{Vargas-Perez:2017:HMO,
author = "Sandino Vargas-Perez and Fahad Saeed",
title = "A Hybrid {MPI--OpenMP} Strategy to Speedup the
Compression of Big Next-Generation Sequencing
Datasets",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "28",
number = "10",
pages = "2760--2769",
month = oct,
year = "2017",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2017.2692782",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Oct 12 06:58:12 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2017/10/07895161-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Waidyasooriya:2017:OBF,
author = "Hasitha Muthumala Waidyasooriya and Yasuhiro Takei and
Shunsuke Tatsumi and Masanori Hariyama",
title = "{OpenCL}-Based {FPGA}-Platform for Stencil Computation
and Its Optimization Methodology",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "28",
number = "5",
pages = "1390--1402",
month = may,
year = "2017",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2016.2614981",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Jun 15 05:46:51 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2017/05/07582502-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Wang:2017:CEG,
author = "Siqi Wang and Guanwen Zhong and Tulika Mitra",
title = "{CGPredict}: Embedded {GPU} Performance Estimation
from Single-Threaded Applications",
journal = j-TECS,
volume = "16",
number = "5s",
pages = "146:1--146:??",
month = oct,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3126546",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:33 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib",
abstract = "Heterogeneous multiprocessor system-on-chip
architectures are endowed with accelerators such as
embedded GPUs and FPGAs capable of general-purpose
computation. The application developers for such
platforms need to carefully choose the accelerator with
the maximum performance benefit. For a given
application, usually, the reference code is specified
in a high-level single-threaded programming language
such as C. The performance of an application kernel on
an accelerator is a complex interplay among the exposed
parallelism, the compiler, and the accelerator
architecture. Thus, determining the performance of a
kernel requires its redevelopment into each
accelerator-specific language, causing substantial
wastage of time and effort. To aid the developer in
this early design decision, we present an analytical
framework CGPredict to predict the performance of a
computational kernel on an embedded GPU architecture
from un-optimized, single-threaded C code. The
analytical approach provides insights on application
characteristics which suggest further
application-specific optimizations. The estimation
error is as low as 2.66\% (average 9\%) compared to the
performance of the same kernel written in native CUDA
code running on NVIDIA Kepler embedded GPU. This low
performance estimation error enables CGPredict to
provide an early design recommendation of the
accelerator starting from C code.",
acknowledgement = ack-nhfb,
articleno = "146",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840",
}
@Article{Weber:2017:MAL,
author = "Nicolas Weber and Michael Goesele",
title = "{MATOG}: Array Layout Auto-Tuning for {CUDA}",
journal = j-TACO,
volume = "14",
number = "3",
pages = "28:1--28:??",
month = sep,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1145/3106341",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Wed Sep 6 17:12:05 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Optimal code performance is (besides correctness and
accuracy) the most important objective in compute
intensive applications. In many of these applications,
Graphic Processing Units (GPUs) are used because of
their high amount of compute power. However, caused by
their massively parallel architecture, the code has to
be specifically adjusted to the underlying hardware to
achieve optimal performance and therefore has to be
reoptimized for each new generation. In reality, this
is usually not the case as productive code is normally
at least several years old and nobody has the time to
continuously adjust existing code to new hardware. In
recent years more and more approaches have emerged that
automatically tune the performance of applications
toward the underlying hardware. In this article, we
present the MATOG auto-tuner and its concepts. It
abstracts the array memory access in CUDA applications
and automatically optimizes the code according to the
used GPUs. MATOG only requires few profiling runs to
analyze even complex applications, while achieving
significant speedups over non-optimized code,
independent of the used GPU generation and without the
need to manually tune the code.",
acknowledgement = ack-nhfb,
articleno = "28",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Wickerson:2017:ACM,
author = "John Wickerson and Mark Batty and Tyler Sorensen and
George A. Constantinides",
title = "Automatically comparing memory consistency models",
journal = j-SIGPLAN,
volume = "52",
number = "1",
pages = "190--204",
month = jan,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3093333.3009838",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:14 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "A memory consistency model (MCM) is the part of a
programming language or computer architecture
specification that defines which values can legally be
read from shared memory locations. Because MCMs take
into account various optimisations employed by
architectures and compilers, they are often complex and
counterintuitive, which makes them challenging to
design and to understand. We identify four tasks
involved in designing and understanding MCMs:
generating conformance tests, distinguishing two MCMs,
checking compiler optimisations, and checking compiler
mappings. We show that all four tasks are instances of
a general constraint-satisfaction problem to which the
solution is either a program or a pair of programs.
Although this problem is intractable for automatic
solvers when phrased over programs directly, we show
how to solve analogous constraints over program
executions, and then construct programs that satisfy
the original constraints. Our technique, which is
implemented in the Alloy modelling framework, is
illustrated on several software- and architecture-level
MCMs, both axiomatically and operationally defined. We
automatically recreate several known results, often in
a simpler form, including: distinctions between
variants of the C11 MCM; a failure of the `SC-DRF
guarantee' in an early C11 draft; that x86 is
`multi-copy atomic' and Power is not; bugs in common
C11 compiler optimisations; and bugs in a compiler
mapping from OpenCL to AMD-style GPUs. We also use our
technique to develop and validate a new MCM for NVIDIA
GPUs that supports a natural mapping from OpenCL.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "POPL '17 conference proceedings.",
}
@Article{Winkler:2017:GSM,
author = "Daniel Winkler and Michael Meister and Massoud
Rezavand and Wolfgang Rauch",
title = "{gpuSPHASE} --- A shared memory caching implementation
for {$2$D} {SPH} using {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "213",
number = "??",
pages = "165--180",
month = apr,
year = "2017",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Feb 4 08:00:23 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465516303666",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655/",
}
@Article{Yam-Uicab:2017:FHT,
author = "R. Yam-Uicab and J. L. Lopez-Martinez and J. A.
Trejo-Sanchez and H. Hidalgo-Silva and S.
Gonzalez-Segura",
title = "A fast {Hough} Transform algorithm for straight lines
detection in an image using {GPU} parallel computing
with {CUDA-C}",
journal = j-J-SUPERCOMPUTING,
volume = "73",
number = "11",
pages = "4823--4842",
month = nov,
year = "2017",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-017-2051-5",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jan 6 08:59:18 MST 2018",
bibsource = "http://link.springer.com/journal/11227/73/11;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{YarKhan:2017:PPN,
author = "Asim YarKhan and Jakub Kurzak and Piotr Luszczek and
Jack Dongarra",
title = "Porting the {PLASMA} Numerical Library to the {OpenMP}
Standard",
journal = j-INT-J-PARALLEL-PROG,
volume = "45",
number = "3",
pages = "612--633",
month = jun,
year = "2017",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-016-0441-6",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jun 24 11:37:59 MDT 2017",
bibsource = "http://link.springer.com/journal/10766/45/3;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Yeh:2017:PFG,
author = "Tsung Tai Yeh and Amit Sabne and Putt Sakdhnagool and
Rudolf Eigenmann and Timothy G. Rogers",
title = "{Pagoda}: Fine-Grained {GPU} Resource Virtualization
for Narrow Tasks",
journal = j-SIGPLAN,
volume = "52",
number = "8",
pages = "221--234",
month = aug,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3155284.3018754",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Fri Dec 1 18:56:12 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Massively multithreaded GPUs achieve high throughput
by running thousands of threads in parallel. To fully
utilize the hardware, workloads spawn work to the GPU
in bulk by launching large tasks, where each task is a
kernel that contains thousands of threads that occupy
the entire GPU. GPUs face severe underutilization and
their performance benefits vanish if the tasks are
narrow, i.e., they contain {$<$} 500 threads.
Latency-sensitive applications in network, signal, and
image processing that generate a large number of tasks
with relatively small inputs are examples of such
limited parallelism. This paper presents Pagoda, a
runtime system that virtualizes GPU resources, using an
OS-like daemon kernel called MasterKernel. Tasks are
spawned from the CPU onto Pagoda as they become
available, and are scheduled by the MasterKernel at the
warp granularity. Experimental results demonstrate that
Pagoda achieves a geometric mean speedup of 5.70x over
PThreads running on a 20-core CPU, 1.51x over
CUDA-HyperQ, and 1.69x over GeMTC, the state-of-
the-art runtime GPU task scheduling system.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '17 conference proceedings.",
}
@Article{Young-S:2017:OGI,
author = "Luis E. Young-S. and Paulsamy Muruganandam and Sadhan
K. Adhikari and Vladimir Loncar and Dusan
Vudragovi{\'c} and Antun Balaz",
title = "{OpenMP} {GNU} and {Intel} {Fortran} programs for
solving the time-dependent {Gross--Pitaevskii}
equation",
journal = j-COMP-PHYS-COMM,
volume = "220",
number = "??",
pages = "503--506",
month = nov,
year = "2017",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Sep 15 11:56:42 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517302321",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Zha:2017:IFM,
author = "Yue Zha and Jing Li",
title = "{IMEC}: A Fully Morphable In-Memory Computing Fabric
Enabled by Resistive Crossbar",
journal = j-IEEE-COMPUT-ARCHIT-LETT,
volume = "16",
number = "2",
pages = "123--126",
month = jul # "\slash " # dec,
year = "2017",
CODEN = "????",
DOI = "https://doi.org/10.1109/LCA.2017.2672558",
ISSN = "1556-6056 (print), 1556-6064 (electronic)",
ISSN-L = "1556-6056",
bibdate = "Thu Jun 20 17:01:23 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "In this paper, we propose a fully morphable In-MEmory
Computing (IMEC) fabric to better implement the concept
of processing inside memory (PIM). Enabled by emerging
nonvolatile memory, i.e., RRAM and its monolithic 3D
integration, IMEC can be configured into one or a
combination of four distinct functions, (1) logic, (2)
ternary content addressable memory, (3) memory, and (4)
interconnect. Thus, IMEC exploits a continuum of PIM
capabilities across the whole spectrum, ranging from 0
percent (pure data storage) to 100 percent (pure
compute engine), or intermediate states in between.
IMEC can be modularly integrated into the DDRx memory
subsystem, communicating with processors by the
ordinary DRAM commands. Additionally, to reduce the
programming burden, we provide a complete framework to
compile applications written in high-level programming
language (e.g., OpenCL) onto IMEC. This framework also
enables code portability across different platforms for
heterogeneous computing. By using this framework,
several benchmarks are mapped onto IMEC for evaluating
its performance, energy and resource utilization. The
simulation results show that, IMEC reduces the energy
consumption by 99.6 percent, and achieves 644x speedup,
compared to a baseline CPU system. We further compare
IMEC with FPGA architecture, and demonstrate that the
performance improvement is not simply obtained by
replacing SRAM cells with denser RRAM cells.",
acknowledgement = ack-nhfb,
affiliation = "Zha, Y (Reprint Author), Univ Wisconsin, Elect \& Comp
Engn Dept, Madison, WI 53706 USA. Zha, Yue; Li, Jing,
Univ Wisconsin, Elect \& Comp Engn Dept, Madison, WI
53706 USA.",
author-email = "yzha3@wisc.edu jli587@wisc.edu",
da = "2019-06-20",
doc-delivery-number = "FR2AX",
eissn = "1556-6064",
fjournal = "IEEE Computer Architecture Letters",
journal-iso = "IEEE Comput. Archit. Lett.",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208",
keywords = "energy-efficiency computing; Non-volatile memory;
processing-in-memory; TCAM",
keywords-plus = "ARCHITECTURE",
number-of-cited-references = "20",
research-areas = "Computer Science",
times-cited = "1",
unique-id = "Zha:2017:IFM",
web-of-science-categories = "Computer Science, Hardware \&
Architecture",
}
@Article{Zhang:2017:DLN,
author = "Jie Zhang and Xiaoyi Lu and Dhabaleswar K. (DK)
Panda",
title = "Designing Locality and {NUMA} Aware {MPI} Runtime for
Nested Virtualization based {HPC} Cloud with {SR--IOV}
Enabled {InfiniBand}",
journal = j-SIGPLAN,
volume = "52",
number = "7",
pages = "187--200",
month = jul,
year = "2017",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3140607.3050765",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Sat Sep 16 10:18:17 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Hypervisor-based virtualization solutions reveal good
security and isolation, while container-based solutions
make applications and workloads more portable and
distributed in an effective, standardized and
repeatable way. Therefore, nested virtualization based
computing environments (e.g., container over virtual
machine), which inherit the capabilities from both
solutions, are becoming more and more attractive in
clouds (e.g., running Docker over Amazon EC2 VMs).
Recent studies have shown that running applications in
either VMs or containers still has significant
overhead, especially for I/O intensive workloads. This
motivates us to investigate whether the nested
virtualization based solution can be adopted to build
high-performance computing (HPC) clouds for running MPI
applications efficiently and where the bottlenecks lie.
To eliminate performance bottlenecks, we propose a
high-performance two-layer locality and NUMA aware MPI
library, which is able to dynamically detect
co-resident containers inside one VM as well as detect
co-resident VM inside one host at MPI runtime. Thus the
MPI processes across different containers and VMs can
communicate to each other by shared memory or Cross
Memory Attach (CMA) channels instead of network channel
if they are co-resident. We further propose an enhanced
NUMA aware hybrid design to utilize InfiniBand loopback
based channel to optimize large message transfer across
containers when they are running on different sockets.
Performance evaluations show that compared with the
performance of the state-of-art (1Layer) design, our
proposed enhance-hybrid design can bring up to 184\%,
81\% and 12\% benefit on point-to-point, collective
operations, and end applications. Compared with the
default performance, our enhanced-hybrid design
delivers up to 184\%, 85\% and 16\% performance
improvement.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "VEE '17 conference proceedings.",
}
@Article{Zhu:2017:OAP,
author = "Huming Zhu and Yanfei Wu and Pei Li and Peng Zhang and
Zhe Ji and Maoguo Gong",
title = "An {OpenCL}-accelerated parallel immunodominance clone
selection algorithm for feature selection",
journal = j-CCPE,
volume = "29",
number = "9",
pages = "",
day = "10",
month = may,
year = "2017",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.3838",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Mon Jul 24 08:22:36 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Zouaoui:2017:CNG,
author = "Chakib Mustapha Anouar Zouaoui and Nasreddine Taleb",
title = "{CL\_ARRAY}: a new generic library of multidimensional
containers for {C++} compilers with extension for
{OpenCL} framework",
journal = j-COMP-LANGS-SYS-STRUCT,
volume = "50",
number = "??",
pages = "53--81",
month = dec,
year = "2017",
CODEN = "????",
ISSN = "1477-8424 (print), 1873-6866 (electronic)",
ISSN-L = "1477-8424",
bibdate = "Fri Sep 15 11:36:13 MDT 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/complngs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S147784241630135X",
acknowledgement = ack-nhfb,
fjournal = "Computer Languages, Systems and Structures",
journal-URL = "http://www.sciencedirect.com/science/journal/14778424/",
}
@Article{AlKadi:2018:GPC,
author = "Muhammed {Al Kadi} and Benedikt Janssen and Jones Yudi
and Michael Huebner",
title = "General-Purpose Computing with Soft {GPUs} on
{FPGAs}",
journal = j-TRETS,
volume = "11",
number = "1",
pages = "5:1--5:??",
month = mar,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3173548",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Sat Oct 19 17:42:59 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
abstract = "Using field-programmable gate arrays (FPGAs) as a
substrate to deploy soft graphics processing units
(GPUs) would enable offering the FPGA compute power in
a very flexible GPU-like tool flow.
Application-specific adaptations like selective
hardening of floating-point operations and instruction
set subsetting would mitigate the high area and power
demands of soft GPUs. This work explores the
capabilities and limitations of soft General Purpose
Computing on GPUs (GPGPU) for both fixed- and floating
point arithmetic. For this purpose, we have developed
FGPU: a configurable, scalable, and portable GPU
architecture designed especially for FPGAs. FGPU is
open-source and implemented entirely in RTL. It can be
programmed in OpenCL and controlled through a Python
API. This article introduces its hardware architecture
as well as its tool flow. We evaluated the proposed
GPGPU approach against multiple other solutions. In
comparison to homogeneous Multi-Processor
System-On-Chips (MPSoCs), we found that using a soft
GPU is a Pareto-optimal solution regarding throughput
per area and energy consumption. On average, FGPU has a
2.9$ \times $ better compute density and 11.2$ \times $
less energy consumption than a single MicroBlaze
processor when computing in IEEE-754 floating-point
format. An average speedup of about 4$ \times $ over
the ARM Cortex-A9 supported with the NEON vector
co-processor has been measured for fixed- or
floating-point benchmarks. In addition, the biggest
FGPU cores we could implement on a Xilinx Zynq-7000
System-On-Chip (SoC) can deliver similar performance to
equivalent implementations with High-Level Synthesis
(HLS).",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Article{Amer:2018:LCM,
author = "Abdelhalim Amer and Huiwei Lu and Pavan Balaji and
Milind Chabbi and Yanjie Wei and Jeff Hammond and
Satoshi Matsuoka",
title = "Lock Contention Management in Multithreaded {MPI}",
journal = j-TOPC,
volume = "5",
number = "3",
pages = "12:1--12:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3275443",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Wed Jan 23 16:12:26 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3275443",
abstract = "In this article, we investigate contention management
in lock-based thread-safe MPI libraries. Specifically,
we make two assumptions: (1) locks are the only form of
synchronization when protecting communication paths;
and (2) contention occurs, and thus serialization is
unavoidable. Our work distinguishes between lock
acquisitions with respect to work being performed
inside a critical section; productive vs. unproductive.
Waiting for message reception without doing anything
else inside a critical section is an example of
unproductive lock acquisition. We show that the
high-throughput nature of modern scalable locking
protocols translates into better communication progress
for throughput-intensive MPI communication but
negatively impacts latency-sensitive communication
because of overzealous unproductive lock acquisition.
To reduce unproductive lock acquisitions, we devised a
method that promotes threads with productive work using
a generic two-level priority locking protocol. Our
results show that using a high-throughput protocol for
productive work and a fair protocol for less productive
code paths ensures the best tradeoff for fine-grained
communication, whereas a fair protocol is sufficient
for more coarse-grained communication. Although these
efforts have been rewarding, scalability degradation
remains significant. We discuss techniques that diverge
from the pure locking model and offer the potential to
further improve scalability.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Arif:2018:RBP,
author = "Mahwish Arif and Hans Vandierendonck",
title = "Reducing the burden of parallel loop schedulers for
many-core processors",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "383--384",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178517",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "This work proposes a low-overhead half-barrier pattern
to schedule fine-grain parallel loops and considers its
integration in the Intel OpenMP and Cilkplus
schedulers. Experimental evaluation demonstrates that
the scheduling overhead of our techniques is 43\% lower
than Intel OpenMP and 12.1x lower than Cilk. We observe
22\% speedup on 48 threads, with a peak of 2.8x
speedup.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '18 proceedings.",
}
@Article{Aydin:2018:RTP,
author = "Semra Aydin and Refik Samet and Omer Faruk Bay",
title = "Real-time parallel image processing applications on
multicore {CPUs} with {OpenMP} and {GPGPU} with
{CUDA}",
journal = j-J-SUPERCOMPUTING,
volume = "74",
number = "6",
pages = "2255--2275",
month = jun,
year = "2018",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-017-2168-6",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:12 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/74/6;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Azimi:2018:SVS,
author = "Reza Azimi and Tyler Fox and Wendy Gonzalez and
Sherief Reda",
title = "Scale-Out vs Scale-Up: A Study of {ARM}-based {SoCs}
on Server-Class Workloads",
journal = j-TOMPECS,
volume = "3",
number = "4",
pages = "18:1--18:??",
month = sep,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3232162",
ISSN = "2376-3639",
bibdate = "Sat Sep 21 07:21:16 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
URL = "https://dl.acm.org/citation.cfm?id=3232162",
abstract = "ARM 64-bit processing has generated enthusiasm to
develop ARM-based servers that are targeted for both
data centers and supercomputers. In addition to the
server-class components and hardware advancements, the
ARM software environment has grown substantially over
the past decade. Major development ecosystems and
libraries have been ported and optimized to run on ARM,
making ARM suitable for server-class workloads. There
are two trends in available ARM SoCs: mobile-class ARM
SoCs that rely on the heterogeneous integration of a
mix of CPU cores, GPGPU streaming multiprocessors
(SMs), and other accelerators, and the server-class
SoCs that instead rely on integrating a larger number
of CPU cores with no GPGPU support and a number of IO
accelerators. For scaling the number of processing
cores, there are two different paradigms: mobile-class
SoCs that use scale-out architecture in the form of a
cluster of simpler systems connected over a network,
and server-class ARM SoCs that use the scale-up
solution and leverage symmetric multiprocessing to pack
a large number of cores on the chip. In this article,
we present ScaleSoC cluster, which is a scale-out
solution based on mobile class ARM SoCs. ScaleSoC
leverages fast network connectivity and GPGPU
acceleration to improve performance and energy
efficiency compared to previous ARM scale-out clusters.
We consider a wide range of modern server-class
parallel workloads to study both scaling paradigms,
including latency-sensitive transactional workloads,
MPI-based CPU and GPGPU-accelerated scientific
applications, and emerging artificial intelligence
workloads. We study the performance and energy
efficiency of ScaleSoC compared to server-class ARM
SoCs and discrete GPGPUs in depth. We quantify the
network overhead on the performance of ScaleSoC and
show that packing a large number of ARM cores on a
single chip does not necessarily guarantee better
performance, due to the fact that shared resources,
such as last-level cache, become performance
bottlenecks. We characterize the GPGPU accelerated
workloads and demonstrate that for applications that
can leverage the better CPU-GPGPU balance of the
ScaleSoC cluster, performance and energy efficiency
improve compared to discrete GPGPUs.",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Modeling and Performance
Evaluation of Computing Systems (TOMPECS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J1525",
}
@Article{Bazow:2018:MPS,
author = "Dennis Bazow and Ulrich Heinz and Michael Strickland",
title = "Massively parallel simulations of relativistic fluid
dynamics on graphics processing units with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "225",
number = "??",
pages = "92--113",
month = apr,
year = "2018",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2017.01.015",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Feb 28 14:39:27 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465517300279",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Belviranli:2018:JDA,
author = "Mehmet E. Belviranli and Seyong Lee and Jeffrey S.
Vetter and Laxmi N. Bhuyan",
title = "{Juggler}: a dependence-aware task-based execution
framework for {GPUs}",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "54--67",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178492",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Scientific applications with single instruction,
multiple data (SIMD) computations show considerable
performance improvements when run on today's graphics
processing units (GPUs). However, the existence of data
dependences across thread blocks may significantly
impact the speedup by requiring global synchronization
across multiprocessors (SMs) inside the GPU. To
efficiently run applications with interblock data
dependences, we need fine-granular task-based execution
models that will treat SMs inside a GPU as stand-alone
parallel processing units. Such a scheme will enable
faster execution by utilizing all internal computation
elements inside the GPU and eliminating unnecessary
waits during device-wide global barriers. In this
paper, we propose Juggler, a task-based execution
scheme for GPU workloads with data dependences. The
Juggler framework takes applications embedding OpenMP
4.5 tasks as input and executes them on the GPU via an
efficient in-device runtime, hence eliminating the need
for kernel-wide global synchronization. Juggler
requires no or little modification to the source code,
and once launched, the runtime entirely runs on the GPU
without relying on the host through the entire
execution. We have evaluated Juggler on an NVIDIA Tesla
P100 GPU and obtained up to 31\% performance
improvement against global barrier based
implementation, with minimal runtime overhead.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '18 proceedings.",
}
@Article{Benedict:2018:SES,
author = "Shajulin Benedict",
title = "{SCALE-EA}: A Scalability Aware Performance Tuning
Framework for {OpenMP} Applications",
journal = j-SCPE,
volume = "19",
number = "1",
pages = "15--30",
month = "????",
year = "2018",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:50 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1390",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Burtscher:2018:HQF,
author = "Martin Burtscher and Sindhu Devale and Sahar Azimi and
Jayadharini Jaiganesh and Evan Powers",
title = "A High-Quality and Fast Maximal Independent Set
Implementation for {GPUs}",
journal = j-TOPC,
volume = "5",
number = "2",
pages = "8:1--8:??",
month = jan,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3291525",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Wed Jan 23 16:12:26 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "Computing a maximal independent set is an important
step in many parallel graph algorithms. This article
introduces ECL-MIS, a maximal independent set
implementation that works well on GPUs. It includes key
optimizations to speed up computation, reduce the
memory footprint, and increase the set size. Its CUDA
implementation requires fewer than 30 kernel
statements, runs asynchronously, and produces a
deterministic result. It outperforms the maximal
independent set implementations of Pannotia, CUSP, and
IrGL on each of the 16 tested graphs of various types
and sizes. On a Titan X GPU, ECL-MIS is between 3.9 and
100 times faster (11.5 times, on average). ECL-MIS
running on the GPU is also faster than the parallel CPU
codes Ligra, Ligra+, and PBBS running on 20 Xeon cores,
which it outperforms by 4.1 times, on average. At the
same time, ECL-MIS produces maximal independent sets
that are up to 52\% larger (over 10\%, on average)
compared to these preexisting CPU and GPU
implementations. Whereas these codes produce maximal
independent sets that are, on average, about 15\%
smaller than the largest possible such sets, ECL-MIS
sets are less than 6\% smaller than the maximum
independent sets.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Bylina:2018:EEO,
author = "Beata Bylina and Jaroslaw Bylina",
title = "An Experimental Evaluation of the {OpenMP} Thread
Mapping for {LU} Factorisation on {Xeon Phi}
Coprocessor and on Hybrid {CPU-MIC} Platform",
journal = j-SCPE,
volume = "19",
number = "3",
pages = "259--274",
month = "????",
year = "2018",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:50 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1373",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Castello:2018:EIR,
author = "Adri{\'a}n Castell{\'o} and Antonio J. Pe{\~n}a and
Rafael Mayo and Judit Planas and Enrique S.
Quintana-Ort{\'{\i}} and Pavan Balaji",
title = "Exploring the interoperability of remote {GPGPU}
virtualization using {rCUDA} and directive-based
programming models",
journal = j-J-SUPERCOMPUTING,
volume = "74",
number = "11",
pages = "5628--5642",
month = nov,
year = "2018",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-016-1791-y",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:09 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/74/11;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Chen:2018:FOB,
author = "Cen Chen and Kenli Li and Aijia Ouyang and Keqin Li",
title = "{FlinkCL}: An {OpenCL}-Based In-Memory Computing
Architecture on Heterogeneous {CPU--GPU} Clusters for
Big Data",
journal = j-IEEE-TRANS-COMPUT,
volume = "67",
number = "12",
pages = "1765--1779",
month = "????",
year = "2018",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2018.2839719",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Nov 8 07:18:03 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://ieeexplore.ieee.org/document/8362980/",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Clay:2018:GAP,
author = "M. P. Clay and D. Buaria and P. K. Yeung and T.
Gotoh",
title = "{GPU} acceleration of a petascale application for
turbulent mixing at high {Schmidt} number using {OpenMP
4.5}",
journal = j-COMP-PHYS-COMM,
volume = "228",
number = "??",
pages = "100--114",
month = jul,
year = "2018",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.02.020",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu May 31 14:21:46 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518300596",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Cowles:2018:ISB,
author = "Mary Kathryn Cowles and Stephen Bonett and Michael
Seedorff",
title = "Independent sampling for {Bayesian} normal conditional
autoregressive models with {OpenCL} acceleration",
journal = j-COMP-STAT,
volume = "33",
number = "1",
pages = "159--177",
month = mar,
year = "2018",
CODEN = "CSTAEB",
DOI = "https://doi.org/10.1007/s00180-017-0752-0",
ISSN = "0943-4062 (print), 1613-9658 (electronic)",
ISSN-L = "0943-4062",
bibdate = "Thu Jun 18 16:19:50 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compstat.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/article/10.1007/s00180-017-0752-0",
acknowledgement = ack-nhfb,
ajournal = "Comp. Stat.",
fjournal = "Computational Statistics",
journal-URL = "http://link.springer.com/journal/180",
}
@Article{Davina:2018:MCP,
author = "A. Lamas Davi{\~n}a and J. E. Roman",
title = "{MPI-CUDA} parallel linear solvers for
block-tridiagonal matrices in the context of {SLEPc}'s
eigensolvers",
journal = j-PARALLEL-COMPUTING,
volume = "74",
number = "??",
pages = "118--135",
month = "????",
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2017.11.006",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Apr 3 13:55:32 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819117301874",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Dieguez:2018:SLP,
author = "Adri{\'a}n P{\'e}rez Di{\'e}guez and Margarita Amor
and Jacobo Lobeiras and Ram{\'o}n Doallo",
title = "Solving Large Problem Sizes of Index-Digit Algorithms
on {GPU}: {FFT} and Tridiagonal System Solvers",
journal = j-IEEE-TRANS-COMPUT,
volume = "67",
number = "1",
pages = "86--101",
month = jan,
year = "2018",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2017.2723879",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Dec 14 07:11:27 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ieeexplore.ieee.org/document/7970194/",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
keywords = "Computer architecture; CUDA; CUSPARSE; FFT; GPU;
Graphics processing units; Instruction sets; Kernel;
medium problem sizes; Proposals; Signal processing
algorithms; Synchronization; tridiagonal systems;
tuning",
}
@Article{Eddelbuettel:2018:BRN,
author = "Dirk Eddelbuettel",
title = "Book Review: {Norman Matloff. \booktitle{Parallel
Computing for Data Science: With Examples in R, C++,
and CUDA}. Boca Raton: CRC Press}",
journal = j-BIOMETRICS,
volume = "74",
number = "2",
pages = "770--770",
month = jun,
year = "2018",
CODEN = "BIOMB6",
DOI = "https://doi.org/10.1111/biom.12896",
ISSN = "0006-341X (print), 1541-0420 (electronic)",
ISSN-L = "0006-341X",
bibdate = "Thu Jun 25 10:48:44 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/biometrics2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/s-plus.bib",
acknowledgement = ack-nhfb,
ajournal = "Biometrics",
fjournal = "Biometrics",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1111/(ISSN)1541-0420",
onlinedate = "26 June 2018",
}
@Article{Faraji:2018:DCG,
author = "Iman Faraji and Ahmad Afsahi",
title = "Design considerations for {GPU}-aware collective
communications in {MPI}",
journal = j-CCPE,
volume = "30",
number = "17",
pages = "e4667:1--e4667:??",
day = "10",
month = sep,
year = "2018",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4667",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Mar 28 08:07:51 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "18 May 2018",
}
@Article{Ferreira:2018:CMM,
author = "Kurt B. Ferreira and Scott Levy and Kevin Pedretti and
Ryan E. Grant",
title = "Characterizing {MPI} matching via trace-based
simulation",
journal = j-PARALLEL-COMPUTING,
volume = "77",
number = "??",
pages = "57--83",
month = sep,
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.05.005",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jan 7 15:25:20 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301467",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gallardo:2018:EMM,
author = "Esthela Gallardo and J{\'e}r{\^o}me Vienne and
Leonardo Fialho and Patricia Teller and James Browne",
title = "Employing {MPI\_T} in {MPI} Advisor to optimize
application performance",
journal = j-IJHPCA,
volume = "32",
number = "6",
pages = "882--896",
day = "1",
month = nov,
year = "2018",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342016684005",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:52 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342016684005",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Gerbessiotis:2018:SIS,
author = "Alexandros V. Gerbessiotis",
title = "A Study of Integer Sorting on Multicores",
journal = j-PARALLEL-PROCESS-LETT,
volume = "28",
number = "04",
pages = "??--??",
month = dec,
year = "2018",
DOI = "https://doi.org/10.1142/S0129626418500147",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Mon Mar 29 12:30:05 MDT 2021",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.worldscientific.com/doi/10.1142/S0129626418500147",
abstract = "Integer sorting on multicores and GPUs can be realized
by a variety of approaches that include variants of
distribution-based methods such as radix-sort,
comparison-oriented algorithms such as deterministic
regular sampling and random sampling parallel sorting,
and network-based algorithms such as Batcher's bitonic
sorting algorithm. In this work we present an
experimental study of integer sorting on multicore
processors. We have implemented serial and parallel
radix-sort for various radixes, deterministic regular
oversampling, and random oversampling parallel sorting,
including new variants of ours, and also some
previously little explored or unexplored variants of
bitonic-sort and odd-even transposition sort. The study
uses multithreading and multiprocessing parallel
programming libraries with the same C language code
working under Open MPI, MulticoreBSP, and BSPlib. We
first provide some general high-level observations on
the performance of these implementations. If we can
conclude anything is that accurate prediction of
performance by taking into consideration architecture
dependent features such as the structure and
characteristics of multiple memory hierarchies is
difficult and more often than not untenable. To some
degree this is affected by the overhead imposed by the
high-level library used in the programming effort.
Another objective is to model the performance of these
algorithms and their implementations under the MBSP
(Multi-memory BSP) model. Despite the limitations
mentioned above, we can still draw some reliable
conclusions and reason about the performance of these
implementations using the MBSP model, thus making MBSP
useful and usable.",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Gerstenberger:2018:EHS,
author = "Robert Gerstenberger and Maciej Besta and Torsten
Hoefler",
title = "Enabling highly scalable remote memory access
programming with {MPI-3} one sided",
journal = j-CACM,
volume = "61",
number = "10",
pages = "106--113",
month = oct,
year = "2018",
CODEN = "CACMA2",
DOI = "https://doi.org/10.1145/3264413",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Thu Sep 27 11:55:45 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cacm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://cacm.acm.org/magazines/2018/10/231375/fulltext",
abstract = "Modern high-performance networks offer remote direct
memory access (RDMA) that exposes a process' virtual
address space to other processes in the network. The
Message Passing Interface (MPI) specification has
recently been extended with a programming interface
called MPI-3 Remote Memory Access (MPI-3 RMA) for
efficiently exploiting state-of-the-art RDMA features.
MPI-3 RMA enables a powerful programming model that
alleviates many message passing downsides. In this
work, we design and develop bufferless protocols that
demonstrate how to implement this interface and support
scaling to millions of cores with negligible memory
consumption while providing highest performance and
minimal overheads. To arm programmers, we provide a
spectrum of performance models for RMA functions that
enable rigorous mathematical analysis of application
performance and facilitate the development of codes
that solve given tasks within specified time and energy
budgets. We validate the usability of our library and
models with several application studies with up to half
a million processes. In a wider sense, our work
illustrates how to use RMA principles to accelerate
computation- and data-intensive codes.",
acknowledgement = ack-nhfb,
fjournal = "Communications of the ACM",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79",
}
@Article{Gianinazzi:2018:CAP,
author = "Lukas Gianinazzi and Pavel Kalvoda and Alessandro {De
Palma} and Maciej Besta and Torsten Hoefler",
title = "Communication-avoiding parallel minimum cuts and
connected components",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "219--232",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178504",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "We present novel scalable parallel algorithms for
finding global minimum cuts and connected components,
which are important and fundamental problems in graph
processing. To take advantage of future massively
parallel architectures, our algorithms are
communication-avoiding: they reduce the costs of
communication across the network and the cache
hierarchy. The fundamental technique underlying our
work is the randomized sparsification of a graph:
removing a fraction of graph edges, deriving a solution
for such a sparsified graph, and using the result to
obtain a solution for the original input. We design and
implement sparsification with O (1) synchronization
steps. Our global minimum cut algorithm decreases
communication costs and computation compared to the
state-of-the-art, while our connected components
algorithm incurs few cache misses and synchronization
steps. We validate our approach by evaluating MPI
implementations of the algorithms on a petascale
supercomputer. We also provide an approximate variant
of the minimum cut algorithm and show that it
approximates the exact solutions well while using a
fraction of cores in a fraction of time.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '18 proceedings.",
}
@Article{Goglin:2018:HTM,
author = "Brice Goglin and Emmanuel Jeannot and Farouk Mansouri
and Guillaume Mercier",
title = "Hardware topology management in {MPI} applications
through hierarchical communicators",
journal = j-PARALLEL-COMPUTING,
volume = "76",
number = "??",
pages = "70--90",
month = aug,
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.05.006",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jun 4 07:40:18 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301480",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gomez-Folgar:2018:MPA,
author = "F. Gomez-Folgar and G. Indalecio and N. Seoane and T.
F. Pena and A. J. Garcia-Loureiro",
title = "{MPI-Performance-Aware-Reallocation}: method to
optimize the mapping of processes applied to a cloud
infrastructure",
journal = j-COMPUTING,
volume = "100",
number = "2",
pages = "211--226",
month = feb,
year = "2018",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-017-0573-6",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Wed Nov 7 08:19:16 MST 2018",
bibsource = "http://link.springer.com/journal/607/100/2;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Gonzalez-Dominguez:2018:MPC,
author = "Jorge Gonzalez-Dominguez and Maria J. Martin",
title = "{MPIGeneNet}: Parallel Calculation of Gene
Co-Expression Networks on Multicore Clusters",
journal = j-TCBB,
volume = "15",
number = "5",
pages = "1732--1737",
month = sep,
year = "2018",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2017.2761340",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Thu Nov 8 06:18:46 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "In this work, we present MPIGeneNet, a parallel tool
that applies Pearson's correlation and Random Matrix
Theory to construct gene co-expression networks. It is
based on the state-of-the-art sequential tool
RMTGeneNet, which provides networks with high
robustness and sensitivity at the expenses of
relatively long runtimes for large scale input
datasets. MPIGeneNet returns the same results as
RMTGeneNet but improves the memory management, reduces
the I/O cost, and accelerates the two most
computationally demanding steps of co-expression
network construction by exploiting the compute
capabilities of common multicore CPU clusters. Our
performance evaluation on two different systems using
three typical input datasets shows that MPIGeneNet is
significantly faster than RMTGeneNet. As an example,
our tool is up to 175.41 times faster on a cluster with
eight nodes, each one containing two 12-core Intel
Haswell processors. The source code of MPIGeneNet, as
well as a reference manual, are available at
https://sourceforge.net/projects/mpigenenet/.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Gupta:2018:ALQ,
author = "Sourendu Gupta and Pushan Majumdar",
title = "Accelerating lattice {QCD} simulations with 2 flavors
of staggered fermions on multiple {GPUs} using
{OpenACC} --- a first attempt",
journal = j-COMP-PHYS-COMM,
volume = "228",
number = "??",
pages = "44--53",
month = jul,
year = "2018",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.03.008",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Thu May 31 14:21:46 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518300808",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Halver:2018:FPM,
author = "Rene Halver and Wilhelm Homberg and Godehard Sutmann",
title = "Function portability of molecular dynamics on
heterogeneous parallel architectures with {OpenCL}",
journal = j-J-SUPERCOMPUTING,
volume = "74",
number = "4",
pages = "1522--1533",
month = apr,
year = "2018",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-017-2232-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:11 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/74/4;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Huang:2018:ACO,
author = "Kai Huang and Biao Hu and Long Chen and Alois Knoll
and Zhihua Wang",
title = "{Adas} on {Cots} with {OpenCL}: A Case Study with Lane
Detection",
journal = j-IEEE-TRANS-COMPUT,
volume = "67",
number = "4",
pages = "559--565",
month = "????",
year = "2018",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2017.2759203",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Mar 15 08:52:31 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ieeexplore.ieee.org/document/8057795/",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Imbernon:2018:ELS,
author = "Baldomero Imbern{\'o}n and Javier Prades and Domingo
Gim{\'e}nez and Jos{\'e} M. Cecilia and Federico
Silla",
title = "Enhancing large-scale docking simulation on
heterogeneous systems: An {MPI} vs {rCUDA} study",
journal = j-FUT-GEN-COMP-SYS,
volume = "79 (part 1)",
number = "??",
pages = "26--37",
year = "2018",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2017.08.050",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Nov 24 15:16:17 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.sciencedirect.com/science/article/pii/S0167739X17309974",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
keywords = "Heterogeneous computing; HPC; Metaheuristics; rCUDA;
Virtual screening",
}
@Article{Jambunathan:2018:COB,
author = "Revathi Jambunathan and Deborah A. Levin",
title = "{CHAOS}: an octree-based {PIC--DSMC} code for modeling
of electron kinetic properties in a plasma plume using
{MPI--CUDA} parallelization",
journal = j-J-COMPUT-PHYS,
volume = "373",
number = "??",
pages = "571--604",
day = "15",
month = nov,
year = "2018",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1016/j.jcp.2018.07.005",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Thu Sep 20 17:02:49 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2015.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999118304601",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Kamburugamuve:2018:AML,
author = "Supun Kamburugamuve and Pulasthi Wickramasinghe and
Saliya Ekanayake and Geoffrey C. Fox",
title = "Anatomy of machine learning algorithm implementations
in {MPI}, {Spark}, and {Flink}",
journal = j-IJHPCA,
volume = "32",
number = "1",
pages = "61--73",
month = jan,
year = "2018",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Sat Jan 6 10:32:00 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Kang:2018:PRS,
author = "Zhijiang Kang and Ze Deng and Wei Han and Dongmei
Zhang",
title = "Parallel Reservoir Simulation with {OpenACC} and
Domain Decomposition",
journal = j-ALGORITHMS-BASEL,
volume = "11",
number = "12",
month = dec,
year = "2018",
CODEN = "ALGOCH",
DOI = "https://doi.org/10.3390/a11120213",
ISSN = "1999-4893 (electronic)",
ISSN-L = "1999-4893",
bibdate = "Fri May 3 14:18:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/algorithms.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.mdpi.com/1999-4893/11/12/213",
acknowledgement = ack-nhfb,
articleno = "??",
fjournal = "Algorithms (Basel)",
journal-URL = "https://www.mdpi.com/journal/algorithms",
pagecount = "??",
pubdates = "Received: 16 November 2018 / Revised: 5 December 2018
/ Accepted: 14 December 2018 / Published: 18 December
2018",
}
@Article{Kono:2018:EOW,
author = "Fumiya Kono and Naohito Nakasato and Kensaku Hayashi
and Alexander Vazhenin and Stanislav Sedukhin",
title = "Evaluations of {OpenCL-written} tsunami simulation on
{FPGA} and comparison with {GPU} implementation",
journal = j-J-SUPERCOMPUTING,
volume = "74",
number = "6",
pages = "2747--2775",
month = jun,
year = "2018",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-018-2315-8",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:12 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/74/6;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Kotsifakou:2018:HHP,
author = "Maria Kotsifakou and Prakalp Srivastava and Matthew D.
Sinclair and Rakesh Komuravelli and Vikram Adve and
Sarita Adve",
title = "{HPVM}: heterogeneous parallel virtual machine",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "68--80",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178493",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "We propose a parallel program representation for
heterogeneous systems, designed to enable performance
portability across a wide range of popular parallel
hardware, including GPUs, vector instruction sets,
multicore CPUs and potentially FPGAs. Our
representation, which we call HPVM, is a hierarchical
dataflow graph with shared memory and vector
instructions. HPVM supports three important
capabilities for programming heterogeneous systems: a
compiler intermediate representation (IR), a virtual
instruction set (ISA), and a basis for runtime
scheduling; previous systems focus on only one of these
capabilities. As a compiler IR, HPVM aims to enable
effective code generation and optimization for
heterogeneous systems. As a virtual ISA, it can be used
to ship executable programs, in order to achieve both
functional portability and performance portability
across such systems. At runtime, HPVM enables flexible
scheduling policies, both through the graph structure
and the ability to compile individual nodes in a
program to any of the target devices on a system. We
have implemented a prototype HPVM system, defining the
HPVM IR as an extension of the LLVM compiler IR,
compiler optimizations that operate directly on HPVM
graphs, and code generators that translate the virtual
ISA to NVIDIA GPUs, Intel's AVX vector units, and to
multicore X86-64 processors. Experimental results show
that HPVM optimizations achieve significant performance
improvements, HPVM translators achieve performance
competitive with manually developed OpenCL code for
both GPUs and vector hardware, and that runtime
scheduling policies can make use of both program and
runtime information to exploit the flexible compilation
capabilities. Overall, we conclude that the HPVM
representation is a promising basis for achieving
performance portability and for implementing
parallelizing compilers for heterogeneous parallel
systems.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '18 proceedings.",
}
@Article{Li:2018:CER,
author = "Xiangbo Li and Mohsen Amini Salehi and Magdy Bayoumi
and Nian-Feng Tzeng and Rajkumar Buyya",
title = "Cost-Efficient and Robust On-Demand Video Transcoding
Using Heterogeneous Cloud Services",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "29",
number = "3",
pages = "556--571",
month = "????",
year = "2018",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2017.2766069",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 15 06:03:25 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ieeexplore.ieee.org/document/8081853/",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Li:2018:COM,
author = "Shigang Li and Yunquan Zhang and Torsten Hoefler",
title = "Cache-Oblivious {MPI} All-to-All Communications Based
on {Morton} Order",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "29",
number = "3",
pages = "542--555",
month = "????",
year = "2018",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2017.2768413",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 15 06:03:25 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://ieeexplore.ieee.org/document/8091010/",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
}
@Article{Liang:2018:FMP,
author = "Yun Liang and Shuo Wang and Wei Zhang",
title = "{FlexCL}: A Model of Performance and Power for
{OpenCL} Workloads on {FPGAs}",
journal = j-IEEE-TRANS-COMPUT,
volume = "67",
number = "12",
pages = "1750--1764",
month = "????",
year = "2018",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2018.2840686",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Nov 8 07:18:03 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://ieeexplore.ieee.org/document/8365849/",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Lin:2018:CHM,
author = "Han Lin and Zhichao Su and Xiandong Meng and Xu Jin
and Zhong Wang and Wenting Han and Hong An and Mengxian
Chi and Zheng Wu",
title = "Combining {Hadoop} with {MPI} to Solve Metagenomics
Problems that are both Data- and Compute-intensive",
journal = j-INT-J-PARALLEL-PROG,
volume = "46",
number = "4",
pages = "762--775",
month = aug,
year = "2018",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-017-0524-z",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Fri Oct 11 08:37:50 MDT 2019",
bibsource = "http://link.springer.com/journal/10766/46/4;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@InProceedings{Malakhov:2018:CMT,
author = "Anton Malakhov and David Liu and Anton Gorshkov and
Terry Wilmarth",
editor = "Fatih Akici and David Lippa and Dillon Niederhut and
M. Pacer",
booktitle = "Proceedings of the {17th Python in Science Conference,
Austin, TX, 9--15 July 2018}",
title = "Composable Multi-Threading and Multi-Processing for
Numeric Libraries",
publisher = "????",
address = "????",
pages = "15--21",
year = "2018",
bibdate = "Wed Aug 1 09:03:36 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib",
URL = "http://conference.scipy.org/proceedings/scipy2018/anton_malakhov.html",
abstract = "Python is popular among scientific communities that
value its simplicity and power, especially as it comes
along with numeric libraries such as NumPy, SciPy,
Dask, and Numba. As CPU core counts keep increasing,
these modules can make use of many cores via
multi-threading for efficient multi-core parallelism.
However, threads can interfere with each other leading
to overhead and inefficiency if used together in a
single application on machines with a large number of
cores. This performance loss can be prevented if all
multi-threaded modules are coordinated. This paper
continues the work started in AMala16 by introducing
more approaches to coordination for both
multi-threading and multi-processing cases. In
particular, we investigate the use of static settings,
limiting the number of simultaneously active OpenMP
parallel regions, and optional parallelism with Intel
Threading Building Blocks (Intel TBB). We will show how
these approaches help to unlock additional performance
for numeric applications on multi-core systems.",
acknowledgement = ack-nhfb,
keywords = "Dask; GIL; Joblib; Multi-core; Multi-processing;
Multi-threading; Nested Parallelism; NumPy; OpenMP;
Oversubscription; Parallel Computations; Python; SciPy;
TBB",
}
@Article{Maleki:2018:AHP,
author = "Sepideh Maleki and Martin Burtscher",
title = "Automatic Hierarchical Parallelization of Linear
Recurrences",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "128--138",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173168",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Linear recurrences encompass many fundamental
computations including prefix sums and digital filters.
Later result values depend on earlier result values in
recurrences, making it a challenge to compute them in
parallel. We present a new work- and space-efficient
algorithm to compute linear recurrences that is
amenable to automatic parallelization and suitable for
hierarchical massively-parallel architectures such as
GPUs. We implemented our approach in a domain-specific
code generator that emits optimized CUDA code. Our
evaluation shows that, for standard prefix sums and
single-stage IIR filters, the generated code reaches
the throughput of memory copy for large inputs, which
cannot be surpassed. On higher-order prefix sums, it
performs nearly as well as the fastest handwritten code
from the literature. On tuple-based prefix sums and
digital filters, our automatically parallelized code
outperforms the fastest prior implementations.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '18 proceedings.",
}
@Article{Malinowski:2018:SIP,
author = "Artur Malinowski and Pawel Czarnul",
title = "A Solution to Image Processing with Parallel {MPI}
{I/O} and Distributed {NVRAM} Cache",
journal = j-SCPE,
volume = "19",
number = "1",
pages = "1--14",
month = "????",
year = "2018",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:50 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1389",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Moll:2018:PCF,
author = "Simon Moll and Sebastian Hack",
title = "Partial control-flow linearization",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "543--556",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192413",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "If-conversion is a fundamental technique for
vectorization. It accounts for the fact that in a SIMD
program, several targets of a branch might be executed
because of divergence. Especially for irregular
data-parallel workloads, it is crucial to avoid
if-converting non-divergent branches to increase SIMD
utilization. In this paper, we present partial
linearization, a simple and efficient if-conversion
algorithm that overcomes several limitations of
existing if-conversion techniques. In contrast to prior
work, it has provable guarantees on which non-divergent
branches are retained and will never duplicate code or
insert additional branches. We show how our algorithm
can be used in a classic loop vectorizer as well as to
implement data-parallel languages such as ISPC or
OpenCL. Furthermore, we implement prior vectorizer
optimizations on top of partial linearization in a more
general way. We evaluate the implementation of our
algorithm in LLVM on a range of irregular data
analytics kernels, a neutronics simulation benchmark
and NAB, a molecular dynamics benchmark from SPEC2017
on AVX2, AVX512, and ARM Advanced SIMD machines and
report speedups of up to 146 \% over ICC, GCC and Clang
O3.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '18 proceedings.",
}
@Article{Monteiro:2018:EGC,
author = "Felipe R. Monteiro and Erickson H. da S. Alves and
Isabela S. Silva and Hussama I. Ismail and Lucas C.
Cordeiro and Eddie B. de Lima Filho",
title = "{ESBMC-GPU}: a context-bounded model checking tool to
verify {CUDA} programs",
journal = j-SCI-COMPUT-PROGRAM,
volume = "152",
number = "??",
pages = "63--69",
day = "15",
month = jan,
year = "2018",
CODEN = "SCPGD4",
ISSN = "0167-6423 (print), 1872-7964 (electronic)",
ISSN-L = "0167-6423",
bibdate = "Sat Dec 2 17:23:38 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scicomputprogram.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167642317301934",
acknowledgement = ack-nhfb,
fjournal = "Science of Computer Programming",
journal-URL = "http://www.sciencedirect.com/science/journal/01676423",
}
@Article{Peng:2018:CDC,
author = "Yuanfeng Peng and Vinod Grover and Joseph Devietti",
title = "{CURD}: a dynamic {CUDA} race detector",
journal = j-SIGPLAN,
volume = "53",
number = "4",
pages = "390--403",
month = apr,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296979.3192368",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:57 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "As GPUs have become an integral part of nearly every
processor, GPU programming has become increasingly
popular. GPU programming requires a combination of
extreme levels of parallelism and low-level
programming, making it easy for concurrency bugs such
as data races to arise. These concurrency bugs can be
extremely subtle and di cult to debug due to the
massive numbers of threads running concurrently on a
modern GPU. While some tools exist to detect data races
in GPU programs, they are often prohibitively slow or
focused only on a small class of data races in shared
memory. Compared to prior work, our race detector,
CURD, can detect data races precisely on both shared
and global memory, selects an appropriate race
detection algorithm based on the synchronization used
in a program, and utilizes efficient compiler
instrumentation to reduce performance overheads. Across
53 benchmarks, we find that using CURD incurs an aver-
age slowdown of just 2.88x over native execution. CURD
is 2.1x faster than Nvidia's CUDA-Racecheck race
detector, despite detecting a much broader class of
races. CURD finds 35 races across our benchmarks,
including bugs in established benchmark suites and in
sample programs from Nvidia.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PLDI '18 proceedings.",
}
@Article{Pessoa:2018:GAB,
author = "Tiago Carneiro Pessoa and Jan Gmys and Francisco Heron
de Carvalho J{\'u}nior and Nouredine Melab and Daniel
Tuyttens",
title = "{GPU}-accelerated backtracking using {CUDA Dynamic
Parallelism}",
journal = j-CCPE,
volume = "30",
number = "9",
pages = "",
day = "10",
month = may,
year = "2018",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4374",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Aug 4 10:03:13 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://onlinelibrary.wiley.com/doi/abs/10.1002/cpe.4374",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Pierro:2018:SFP,
author = "Vincenzo Pierro and Luigi Troiano and Elena Mejuto and
Giovanni Filatrella",
title = "Stochastic first passage time accelerated with
{CUDA}",
journal = j-J-COMPUT-PHYS,
volume = "361",
number = "??",
pages = "136--149",
day = "15",
month = may,
year = "2018",
CODEN = "JCTPAH",
DOI = "https://doi.org/10.1016/j.jcp.2018.01.039",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Wed Mar 21 16:14:42 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2015.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999118300494",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Pinho:2018:CTM,
author = "Luis Miguel Pinho and Eduardo Qui{\~n}ones and Sara
Royuela",
title = "Combining the tasklet model with {OpenMP}",
journal = j-SIGADA-LETTERS,
volume = "38",
number = "1",
pages = "14--18",
month = jun,
year = "2018",
CODEN = "AALEE5",
DOI = "https://doi.org/10.1145/3241950.3241952",
ISSN = "0736-721X",
bibdate = "Sat Oct 19 17:57:55 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigada.bib",
abstract = "Previous workshops have discussed a proposal to
augment Ada with fine-grained parallelism, based on the
notion of tasklets, a lightweight parallel entity.
Recent works have shown the convergence of this model
with the OpenMP tasking model and have proposed their
coexistence. In this paper we provide a status of the
existent works, and describe how these models could be
combined.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGADA Ada Letters",
journal-URL = "http://portal.acm.org/citation.cfm?id=J32",
}
@Article{Poirier:2018:DAB,
author = "Carl Poirier and Benoit Gosselin and Paul Fortier",
title = "{DNA} Assembly with {de Bruijn} Graphs Using an {FPGA}
Platform",
journal = j-TCBB,
volume = "15",
number = "3",
pages = "1003--1009",
month = may,
year = "2018",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2017.2696522",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Sat Jun 30 09:34:37 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "This paper presents an FPGA implementation of a DNA
assembly algorithm, called Ray, initially developed to
run on parallel CPUs. The OpenCL language is used and
the focus is placed on modifying and optimizing the
original algorithm to better suit the new
parallelization tool and the radically different
hardware architecture. The results show that the
execution time is roughly one fourth that of the CPU
and factoring energy consumption yields a tenfold
savings.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Prabhu:2018:DRC,
author = "Tarun Prabhu and William Gropp",
title = "{DAME}: Runtime-compilation for data movement",
journal = j-IJHPCA,
volume = "32",
number = "5",
pages = "760--774",
year = "2018",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342017695444",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Mon Nov 5 17:34:17 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://journals.sagepub.com/doi/full/10.1177/1094342017695444",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
xxmonth = sep,
}
@Article{Ramesh:2018:MPE,
author = "Srinivasan Ramesh and Aur{\`e}le Mah{\'e}o and Sameer
Shende and Allen D. Malony and Hari Subramoni and Amit
Ruhela and Dhabaleswar K. (DK) Panda",
title = "{MPI} performance engineering with the {MPI} tool
interface: the integration of {MVAPICH} and {TAU}",
journal = j-PARALLEL-COMPUTING,
volume = "77",
number = "??",
pages = "19--37",
month = sep,
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.05.003",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jan 7 15:25:20 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301479",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Rasch:2018:MDH,
author = "Ari Rasch and Sergei Gorlatch",
title = "Multi-dimensional Homomorphisms and Their
Implementation in {OpenCL}",
journal = j-INT-J-PARALLEL-PROG,
volume = "46",
number = "1",
pages = "101--119",
month = feb,
year = "2018",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-017-0508-z",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sun Feb 11 08:41:13 MST 2018",
bibsource = "http://link.springer.com/journal/10766/46/1;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Riebler:2018:ACA,
author = "Heinrich Riebler and Gavin Vaz and Tobias Kenter and
Christian Plessl",
title = "Automated code acceleration targeting heterogeneous
{OpenCL} devices",
journal = j-SIGPLAN,
volume = "53",
number = "1",
pages = "417--418",
month = jan,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3200691.3178534",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "Accelerators can offer exceptional performance
advantages. However, programmers need to spend
considerable efforts on acceleration, without knowing
how sustainable the employed programming models,
languages and tools are. To tackle this challenge, we
propose and demonstrate a new runtime system called HT
rOP that is able to automatically generate and execute
OpenCL code from sequential CPU code. HTrOP transforms
suitable data-parallel loops into independent
OpenCL-typical work-items and handles concrete calls to
these devices through a mix of library components and
application-specific OpenCL host code. Computational
hotspots are identified and can be offloaded to
different resources (CPU, GPGPU and Xeon Phi). We
demonstrate the potential of HTrOP on a broad set of
applications and are able to improve the performance by
4.3X on average.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "PPoPP '18 proceedings.",
}
@Article{Rivas-Gomez:2018:MWS,
author = "Sergio Rivas-Gomez and Roberto Gioiosa and Ivy Bo Peng
and Gokcen Kestor and Sai Narasimhamurthy and Erwin
Laure and Stefano Markidis",
title = "{MPI} windows on storage for {HPC} applications",
journal = j-PARALLEL-COMPUTING,
volume = "77",
number = "??",
pages = "38--56",
month = sep,
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.05.007",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jan 7 15:25:20 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118301571",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Rucci:2018:OOS,
author = "Enzo Rucci and Carlos Garcia and Guillermo Botella and
Armando E. {De Giusti} and Marcelo Naiouf and Manuel
Prieto-Matias",
title = "{OSWALD}: {OpenCL} {Smith--Waterman} on {Altera}'s
{FPGA} for Large Protein Databases",
journal = j-IJHPCA,
volume = "32",
number = "3",
pages = "337--350",
month = may,
year = "2018",
CODEN = "IHPCFL",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Mon Nov 5 17:34:16 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "http://hpc.sagepub.com/content/by/year",
}
@Article{Schmitt:2018:RHG,
author = "Christian Schmitt and Moritz Schmid and Sebastian
Kuckuk and Harald K{\"o}stler and J{\"u}rgen Teich and
Frank Hannig",
title = "Reconfigurable Hardware Generation of Multigrid
Solvers with Conjugate Gradient Coarse-Grid Solution",
journal = j-PARALLEL-PROCESS-LETT,
volume = "28",
number = "04",
pages = "??--??",
month = dec,
year = "2018",
DOI = "https://doi.org/10.1142/S0129626418500160",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Mon Mar 29 12:30:05 MDT 2021",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.worldscientific.com/doi/10.1142/S0129626418500160",
abstract = "Not only in the field of high-performance computing
(HPC), field programmable gate arrays (FPGAs) are a
soaringly popular accelerator technology. However, they
use a completely different programming paradigm and
tool set compared to central processing units (CPUs) or
even graphics processing units (GPUs), adding extra
development steps and requiring special knowledge,
hindering widespread use in scientific computing. To
bridge this programmability gap, domain-specific
languages (DSLs) are a popular choice to generate
low-level implementations from an abstract algorithm
description. In this work, we demonstrate our approach
for the generation of numerical solver implementations
based on the multigrid method for FPGAs from the same
code base that is also used to generate code for CPUs
using a hybrid parallelization of MPI and OpenMP. Our
approach yields in a hardware design that can compute
up to 11 V-cycles per second with an input grid size of
4096 {\texttimes} \{\texttimes} {\texttimes} 4096 and
solution on the coarsest using the conjugate gradient
(CG) method on a mid-range FPGA, beating vectorized,
multi-threaded execution on an Intel Xeon processor.",
acknowledgement = ack-nhfb,
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Si:2018:DAA,
author = "Min Si and Antonio J. Pena and Jeff Hammond and Pavan
Balaji and Masamichi Takagi and Yutaka Ishikawa",
title = "Dynamic Adaptable Asynchronous Progress Model for
{MPI} {RMA} Multiphase Applications",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "29",
number = "9",
pages = "1975--1989",
month = sep,
year = "2018",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2018.2815568",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Aug 9 10:52:00 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2018/09/08315136-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Snir:2018:FMT,
author = "Marc Snir",
title = "The future of {MPI}: technical perspective",
journal = j-CACM,
volume = "61",
number = "10",
pages = "105--105",
month = oct,
year = "2018",
CODEN = "CACMA2",
DOI = "https://doi.org/10.1145/3264415",
ISSN = "0001-0782 (print), 1557-7317 (electronic)",
ISSN-L = "0001-0782",
bibdate = "Thu Sep 27 11:55:45 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cacm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://cacm.acm.org/magazines/2018/10/231376/fulltext",
acknowledgement = ack-nhfb,
fjournal = "Communications of the ACM",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J79",
}
@Article{Sojka:2018:IEM,
author = "Radim Sojka and David Hor{\'a}k and V{\'a}clav Hapla
and Martin Cerm{\'a}k",
title = "The impact of enabling multiple subdomains per {MPI}
process in the {TFETI} domain decomposition method",
journal = j-APPL-MATH-COMP,
volume = "319",
number = "??",
pages = "586--597",
day = "15",
month = feb,
year = "2018",
CODEN = "AMHCBQ",
DOI = "https://doi.org/10.1016/j.amc.2017.07.031",
ISSN = "0096-3003 (print), 1873-5649 (electronic)",
ISSN-L = "0096-3003",
bibdate = "Wed Nov 15 17:37:14 MST 2017",
bibsource = "http://www.math.utah.edu/pub/tex/bib/applmathcomput2015.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0096300317304927",
acknowledgement = ack-nhfb,
fjournal = "Applied Mathematics and Computation",
journal-URL = "http://www.sciencedirect.com/science/journal/00963003",
}
@Article{Sotiriou-Xanthopoulos:2018:OBV,
author = "Efstathios Sotiriou-Xanthopoulos and Leonard Masing
and Sotirios Xydis and Kostas Siozios and J{\"U}rgen
Becker and Dimitrios Soudris",
title = "{OpenCL}-based Virtual Prototyping and Simulation of
Many-Accelerator Architectures",
journal = j-TECS,
volume = "17",
number = "5",
pages = "86:1--86:??",
month = nov,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3242179",
ISSN = "1539-9087 (print), 1558-3465 (electronic)",
ISSN-L = "1539-9087",
bibdate = "Thu Oct 17 18:16:41 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tecs.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3242179",
abstract = "Heterogeneous architectures featuring multiple
hardware accelerators have been proposed as a promising
solution for meeting the ever-increasing performance
and power requirements of embedded systems. However,
the existence of numerous design parameters may result
in different architectural schemes and thus in extra
design effort. To address this issue, OpenCL-based
frameworks have been recently utilized for FPGA
programming, to enable the portability of a source code
to multiple architectures. However, such OpenCL
frameworks focus on RTL design, thus not enabling rapid
prototyping and abstracted modeling of complex systems.
Virtual Prototyping aims to overcome this problem by
enabling the system modeling in higher abstraction
levels. This article combines the benefits of OpenCL
and Virtual Prototyping, by proposing an OpenCL-based
prototyping framework for data-parallel
many-accelerator systems, which (a) creates a SystemC
Virtual Platform from OpenCL, (b) provides a
co-simulation environment for the host and the Virtual
Platform, (c) offers memory and interconnection models
for parallel data processing, and (d) enables the
system evaluation with alternative real number
representations (e.g., fixed-point or 16-bit
floating-point).",
acknowledgement = ack-nhfb,
articleno = "86",
fjournal = "ACM Transactions on Embedded Computing Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J840",
}
@Article{Stpiczynski:2018:LBV,
author = "Przemys{\l}aw Stpiczy{\'n}ski",
title = "Language-based vectorization and parallelization using
intrinsics, {OpenMP}, {TBB} and {Cilk Plus}",
journal = j-J-SUPERCOMPUTING,
volume = "74",
number = "4",
pages = "1461--1472",
month = apr,
year = "2018",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-017-2231-3",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:11 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/74/4;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/s11227-017-2231-3.pdf",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Tagliavini:2018:UFG,
author = "Giuseppe Tagliavini and Daniele Cesarini and Andrea
Marongiu",
title = "Unleashing Fine-Grained Parallelism on Embedded
Many-Core Accelerators with Lightweight {OpenMP}
Tasking",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "29",
number = "9",
pages = "2150--2163",
month = sep,
year = "2018",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2018.2814602",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Aug 9 10:52:00 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2018/09/08314096-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Tellez-Velazquez:2018:CSI,
author = "Arturo T{\'e}llez-Vel{\'a}zquez and Ra{\'u}l
Cruz-Barbosa",
title = "A {CUDA}-streams inference machine for non-singleton
fuzzy systems",
journal = j-CCPE,
volume = "30",
number = "8",
pages = "",
day = "25",
month = apr,
year = "2018",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4382",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Aug 4 10:03:13 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://onlinelibrary.wiley.com/doi/abs/10.1002/cpe.4382",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
}
@Article{Tong:2018:FCM,
author = "Zhou Tong and Scott Pakin and Michael Lang and Xin
Yuan",
title = "Fast classification of {MPI} applications using
{Lamport}'s logical clocks",
journal = j-J-PAR-DIST-COMP,
volume = "120",
number = "??",
pages = "77--88",
month = oct,
year = "2018",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2018.05.005",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Aug 10 09:10:45 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S074373151830340X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Tracy:2018:CMC,
author = "Fred Thomas Tracy and Thomas C. Oppe and Maureen K.
Corcoran",
title = "A comparison of {MPI} and co-array {FORTRAN} for large
finite element variably saturated flow simulations",
journal = j-SCPE,
volume = "19",
number = "4",
pages = "423--432",
month = "????",
year = "2018",
CODEN = "????",
ISSN = "1895-1767",
ISSN-L = "1895-1767",
bibdate = "Mon Jan 7 06:46:51 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/scpe.bib",
URL = "https://www.scpe.org/index.php/scpe/article/view/1468",
acknowledgement = ack-nhfb,
fjournal = "Scalable Computing: Practice and Experience",
journal-URL = "http://www.scpe.org/",
}
@Article{Valero-Lara:2018:CCC,
author = "Pedro Valero-Lara and Ivan Mart{\'\i}nez-P{\'e}rez and
Ra{\"u}l Sirvent and Xavier Martorell and Antonio J.
Pe{\~n}a",
title = "{cuThomasBatch} and {cuThomasVBatch}, {CUDA} routines
to compute batch of tridiagonal systems on {NVIDIA
GPUs}",
journal = j-CCPE,
volume = "30",
number = "24",
pages = "e4909:1--e4909:??",
day = "25",
month = dec,
year = "2018",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4909",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Mar 28 08:07:53 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "27 August 2018",
}
@Article{Villaverde:2018:PTI,
author = "Alejandro F. Villaverde and Kolja Becker and Julio R.
Banga",
title = "{PREMER}: a Tool to Infer Biological Networks",
journal = j-TCBB,
volume = "15",
number = "4",
pages = "1193--1202",
month = jul,
year = "2018",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2017.2758786",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Thu Nov 8 06:18:45 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "Inferring the structure of unknown cellular networks
is a main challenge in computational biology.
Data-driven approaches based on information theory can
determine the existence of interactions among network
nodes automatically. However, the elucidation of
certain features-such as distinguishing between direct
and indirect interactions or determining the direction
of a causal link-requires estimating
information-theoretic quantities in a multidimensional
space. This can be a computationally demanding task,
which acts as a bottleneck for the application of
elaborate algorithms to large-scale network inference
problems. The computational cost of such calculations
can be alleviated by the use of compiled programs and
parallelization. To this end, we have developed PREMER
Parallel Reverse Engineering with Mutual information \&
Entropy Reduction, a software toolbox that can run in
parallel and sequential environments. It uses
information theoretic criteria to recover network
topology and determine the strength and causality of
interactions, and allows incorporating prior knowledge,
imputing missing data, and correcting outliers. PREMER
is a free, open source software tool that does not
require any commercial software. Its core algorithms
are programmed in FORTRAN 90 and implement OpenMP
directives. It has user interfaces in Python and
MATLAB/Octave, and runs on Windows, Linux, and OSX
https://sites.google.com/site/premertoolbox/.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Wolfe:2018:MLS,
author = "Noah Wolfe and Misbah Mubarak and Christopher D.
Carothers and Robert B. Ross and Philip H. Carns",
title = "Modeling Large-Scale Slim Fly Networks Using Parallel
Discrete-Event Simulation",
journal = j-TOMACS,
volume = "28",
number = "4",
pages = "29:1--29:??",
month = oct,
year = "2018",
CODEN = "ATMCEZ",
DOI = "https://doi.org/10.1145/3203406",
ISSN = "1049-3301 (print), 1558-1195 (electronic)",
ISSN-L = "1049-3301",
bibdate = "Mon Feb 4 19:19:05 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tomacs.bib",
abstract = "As supercomputers approach exascale performance, the
increased number of processors translates to an
increased demand on the underlying network
interconnect. The slim fly network topology, a new
low-diameter, low-latency, and low-cost interconnection
network, is gaining interest as one possible solution
for next-generation supercomputing interconnect
systems. In this article, we present a high-fidelity
slim fly packet-level model leveraging the Rensselaer
Optimistic Simulation System (ROSS) and Co-Design of
Exascale Storage (CODES) frameworks. We validate the
model with published work before scaling the network
size up to an unprecedented 1 million compute nodes and
confirming that the slim fly observes peak network
throughput at extreme scale. In addition to synthetic
workloads, we evaluate large-scale slim fly models with
real communication workloads from applications in the
Design Forward program with over 110,000 MPI processes.
We show strong scaling of the slim fly model on an
Intel cluster achieving a peak network packet transfer
rate of 2.3 million packets per second and processing
over 7 billion discrete events using 128 MPI tasks.
Enabled by the strong performance capabilities of the
model, we perform a detailed application trace and
routing protocol performance study. Through analysis of
metrics such as packet latency, hop count, and
congestion, we find that the slim fly network is able
to leverage simple minimal routing and achieve the same
performance as more complex adaptive routing for tested
DOE benchmark applications.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Modeling and Computer Simulation",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J781",
}
@Article{Wolfe:2018:ODM,
author = "Michael Wolfe and Seyong Lee and Jungwon Kim and
Xiaonan Tian and Rengan Xu and Barbara Chapman and
Sunita Chandrasekaran",
title = "The {OpenACC} data model: Preliminary study on its
major challenges and implementations",
journal = j-PARALLEL-COMPUTING,
volume = "78",
number = "??",
pages = "15--27",
month = oct,
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.07.003",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jan 7 15:25:20 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118302175",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Yamazaki:2018:SIL,
author = "Ichitaro Yamazaki and Jakub Kurzak and Panruo Wu and
Mawussi Zounon and Jack Dongarra",
title = "Symmetric Indefinite Linear Solver Using {OpenMP} Task
on Multicore Architectures",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "29",
number = "8",
pages = "1879--1892",
month = aug,
year = "2018",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2018.2808964",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jul 25 09:07:14 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2018/08/08301559-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Yviquel:2018:CPU,
author = "Herv{\'e} Yviquel and Lauro Cruz and Guido Araujo",
title = "Cluster Programming using the {OpenMP} Accelerator
Model",
journal = j-TACO,
volume = "15",
number = "3",
pages = "35:1--35:??",
month = oct,
year = "2018",
CODEN = "????",
DOI = "https://doi.org/10.1145/3226112",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Tue Jan 8 17:19:59 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3226112",
abstract = "Computation offloading is a programming model in which
program fragments (e.g., hot loops) are annotated so
that their execution is performed in dedicated hardware
or accelerator devices. Although offloading has been
extensively used to move computation to GPUs, through
directive-based annotation standards like OpenMP,
offloading computation to very large computer clusters
can become a complex and cumbersome task. It typically
requires mixing programming models (e.g., OpenMP and
MPI) and languages (e.g., C/C++ and Scala), dealing
with various access control mechanisms from different
cloud providers (e.g., AWS and Azure), and integrating
all this into a single application. This article
introduces computer cluster nodes as simple OpenMP
offloading devices that can be used either from a local
computer or from the cluster head-node. It proposes a
methodology that transforms OpenMP directives to Spark
runtime calls with fully integrated communication
management, in a way that a cluster appears to the
programmer as yet another accelerator device.
Experiments using LLVM 3.8, OpenMP 4.5 on well known
cloud infrastructures (Microsoft Azure and Amazon EC2)
show the viability of the proposed approach, enable a
thorough analysis of its performance, and make a
comparison with an MPI implementation. The results show
that although data transfers can impose overheads,
cloud offloading from a local machine can still achieve
promising speedups for larger granularity: up to 115$
\times $ in 256 cores for the 2MM benchmark using 1GB
sparse matrices. In addition, the parallel
implementation of a complex and relevant scientific
application reveals a 80$ \times $ speedup on a 320
core machine when executed directly from the headnode
of the cluster.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Zha:2018:LSM,
author = "Yue Zha and Jing Li",
title = "{Liquid Silicon-Monona}: a Reconfigurable
Memory-Oriented Computing Fabric with Scalable
Multi-Context Support",
journal = j-SIGPLAN,
volume = "53",
number = "2",
pages = "214--228",
month = feb,
year = "2018",
CODEN = "SINODQ",
DOI = "https://doi.org/10.1145/3296957.3173167",
ISSN = "0362-1340 (print), 1523-2867 (print), 1558-1160
(electronic)",
ISSN-L = "0362-1340",
bibdate = "Wed Oct 16 14:12:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigplan2010.bib",
abstract = "With the recent trend of promoting Field-Programmable
Gate Arrays (FPGAs) to first-class citizens in
accelerating compute-intensive applications in
networking, cloud services and artificial intelligence,
FPGAs face two major challenges in sustaining
competitive advantages in performance and energy
efficiency for diverse cloud workloads: (1) limited
configuration capability for supporting light-weight
computations/on-chip data storage to accelerate
emerging search-/data-intensive applications. (2) lack
of architectural support to hide reconfiguration
overhead for assisting virtualization in a cloud
computing environment. In this paper, we propose a
reconfigurable memory-oriented computing fabric, namely
Liquid Silicon-Monona (L-Si), enabled by emerging
nonvolatile memory technology i.e. RRAM, to address
these two challenges. Specifically, L-Si addresses the
first challenge by virtue of a new architecture
comprising a 2D array of physically identical but
functionally-configurable building blocks. It, for the
first time, extends the configuration capabilities of
existing FPGAs from computation to the whole spectrum
ranging from computation to data storage. It allows
users to better customize hardware by flexibly
partitioning hardware resources between computation and
memory, greatly benefiting emerging search- and
data-intensive applications. To address the second
challenge, L-Si provides scalable multi-context
architectural support to minimize reconfiguration
overhead for assisting virtualization. In addition, we
provide compiler support to facilitate the programming
of applications written in high-level programming
languages (e.g. OpenCL) and frameworks (e.g.
TensorFlow, MapReduce) while fully exploiting the
unique architectural capability of L-Si. Our evaluation
results show L-Si achieves 99.6\% area reduction, 1.43$
\times $ throughput improvement and 94.0\% power
reduction on search-intensive benchmarks, as compared
with the FPGA baseline. For neural network benchmarks,
on average, L-Si achieves 52.3$ \times $ speedup,
113.9$ \times $ energy reduction and 81\% area
reduction over the FPGA baseline. In addition, the
multi-context architecture of L-Si reduces the context
switching time to --- 10ns, compared with an
off-the-shelf FPGA (~100ms), greatly facilitating
virtualization.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGPLAN Notices",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J706",
remark = "ASPLOS '18 proceedings.",
}
@Article{Zhang:2018:IRP,
author = "Xuechen Zhang and Song Jiang and Alseny Diallo and Lei
Wang",
title = "{IR+}: Removing parallel {I/O} interference of {MPI}
programs via data replication over heterogeneous
storage devices",
journal = j-PARALLEL-COMPUTING,
volume = "76",
number = "??",
pages = "91--105",
month = aug,
year = "2018",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.01.004",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jun 4 07:40:18 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118300140",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Adam:2019:CRA,
author = "Julien Adam and Maxime Kermarquer and Jean-Baptiste
Besnard and Leonardo Bautista-Gomez and Marc
P{\'e}rache and Patrick Carribault and Julien Jaeger
and Allen D. Malony and Sameer Shende",
title = "Checkpoint\slash restart approaches for a thread-based
{MPI} runtime",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "204--219",
month = jul,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2019.02.006",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303247",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Al-Shorman:2019:UPP,
author = "Mohammad Y. Al-Shorman and Majd M. Al-Kofahi",
title = "Ultrasonic pulse propagation simulation using {OpenCL}
for environment mapping and discovery",
journal = j-IJHPCA,
volume = "33",
number = "5",
pages = "1019--1029",
day = "1",
month = sep,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342019846290",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:54 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019846290",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Awan:2019:OLM,
author = "Ammar Ahmad Awan and Karthik Vadambacheri Manian and
Ching-Hsiang Chu and Hari Subramoni and Dhabaleswar K.
Panda",
title = "Optimized large-message broadcast for deep learning
workloads: {MPI}, {MPI + NCCL}, or {NCCL2}?",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "141--152",
month = jul,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303284",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Badia:2019:ASP,
author = "Jose M. Bad{\'{\i}}a and Jose A. Belloch and Maximo
Cobos and Francisco D. Igual and Enrique S.
Quintana-Ort{\'{\i}}",
title = "Accelerating the {SRP--PHAT} algorithm on multi- and
many-core platforms using {OpenCL}",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "3",
pages = "1284--1297",
month = mar,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-018-2422-6",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:17 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Balaji:2019:SIM,
author = "Pavan Balaji and Marc Casas",
title = "Special issue on the Message Passing Interface",
journal = j-PARALLEL-COMPUTING,
volume = "86",
number = "??",
pages = "14--15",
month = aug,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S016781911930095X",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Boschetti:2019:MOD,
author = "Marco Antonio Boschetti and Vittorio Maniezzo and
Francesco Strappaveccia",
title = "Membership overlay design optimization with resource
constraints (accelerated on {GPU})",
journal = j-J-PAR-DIST-COMP,
volume = "133",
number = "??",
pages = "286--296",
month = nov,
year = "2019",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Sep 13 10:25:21 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731518304908",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Brown:2019:LMR,
author = "Nick Brown and Michael Bareford and Mich{\`e}le
Weiland",
title = "Leveraging {MPI} {RMA} to optimize halo-swapping
communications in {MONC} on {Cray} machines",
journal = j-CCPE,
volume = "31",
number = "16",
pages = "e5008:1--e5008:??",
day = "25",
month = aug,
year = "2019",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5008",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Oct 12 11:00:04 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "25 September 2018",
}
@Article{Budiardja:2019:TGO,
author = "Reuben D. Budiardja and Christian Y. Cardall",
title = "Targeting {GPUs} with {OpenMP} directives on {Summit}:
a simple and effective {Fortran} experience",
journal = j-PARALLEL-COMPUTING,
volume = "88",
number = "??",
pages = "Article 102544",
month = "????",
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:02 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819119301358",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Cadenelli:2019:CUO,
author = "Nicola Cadenelli and Zoran Jak{\v{s}}i{\'c} and
Jord{\`a} Polo and David Carrera",
title = "Considerations in using {OpenCL} on {GPUs} and {FPGAs}
for throughput-oriented genomics workloads",
journal = j-FUT-GEN-COMP-SYS,
volume = "94",
number = "??",
pages = "148--159",
month = may,
year = "2019",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Oct 14 16:09:56 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X18314183",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Candel:2019:EMC,
author = "F. Candel and A. Valero and S. Petit and J.
Sahuquillo",
title = "Efficient Management of Cache Accesses to Boost
{GPGPU} Memory Subsystem Performance",
journal = j-IEEE-TRANS-COMPUT,
volume = "68",
number = "10",
pages = "1442--1454",
month = oct,
year = "2019",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2019.2907591",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Sep 12 13:33:25 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
keywords = "Analytical models; Energy consumption; GPU; Graphics
processing units; Instruction sets; memory hierarchy;
Memory management; miss management; Proposals",
}
@Article{Chen:2019:STG,
author = "Yong Chen and Weijia Shang",
title = "Supernode transformation on {GPGPUs}",
journal = j-INT-J-PAR-EMER-DIST-SYS,
volume = "34",
number = "2",
pages = "181--202",
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1080/17445760.2017.1296147",
ISSN = "1744-5760 (print), 1744-5779 (electronic)",
ISSN-L = "1744-5760",
bibdate = "Tue Sep 10 15:30:02 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/intjparemerdistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.tandfonline.com/toc/gpaa20/34/2",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel, Emergent and
Distributed Systems: IJPEDS",
journal-URL = "http://www.tandfonline.com/loi/gpaa20",
onlinedate = "06 Apr 2018",
}
@Article{Chikin:2019:MAA,
author = "Artem Chikin and Taylor Lloyd and Jos{\'e} Nelson
Amaral and Ettore Tiotto and Muhammad Usman",
title = "Memory-access-aware Safety and Profitability Analysis
for Transformation of Accelerator-bound {OpenMP}
Loops",
journal = j-TACO,
volume = "16",
number = "3",
pages = "30:1--30:??",
month = jul,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3333060",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Jul 26 14:25:54 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Iteration Point Difference Analysis is a new static
analysis framework that can be used to determine the
memory coalescing characteristics of parallel loops
that target GPU offloading and to ascertain safety and
profitability of loop transformations with the goal of
improving their memory access characteristics. This
analysis can propagate definitions through control
flow, works for non-affine expressions, and is capable
of analyzing expressions that reference conditionally
defined values. This analysis framework enables safe
and profitable loop transformations. Experimental
results demonstrate potential for dramatic performance
improvements. GPU kernel execution time across the
Polybench suite is improved by up to $ 25.5 \times $ on
an Nvidia P100 with benchmark overall improvement of up
to $ 3.2 \times $. An opportunity detected in a SPEC
ACCEL benchmark yields kernel speedup of $ 86.5 \times
$ with a benchmark improvement of $ 3.3 \times $. This
work also demonstrates how architecture-aware compilers
improve code portability and reduce programmer
effort.",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Ciglaric:2019:OLP,
author = "Tadej Ciglaric and Rok Cesnovar and Erik Strumbelj",
title = "An {OpenCL} library for parallel random number
generators",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "7",
pages = "3866--3881",
month = jul,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02756-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:20 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/7;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Clauser:2019:FFO,
author = "C. F. Clauser and R. Farengo and H. E. Ferrari",
title = "{FOCUS}: a full-orbit {CUDA} solver for particle
simulations in magnetized plasmas",
journal = j-COMP-PHYS-COMM,
volume = "234",
number = "??",
pages = "126--136",
month = jan,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.07.018",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Oct 16 18:11:50 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518302753",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Coronado-Barrientos:2019:ANF,
author = "E. Coronado-Barrientos and G. Indalecio and A.
Garc{\'\i}a-Loureiro",
title = "{AXC}: a new format to perform the {SpMV} oriented to
{Intel Xeon Phi} architecture in {OpenCL}",
journal = j-CCPE,
volume = "31",
number = "1",
pages = "e4864:1--e4864:??",
day = "10",
month = jan,
year = "2019",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4864",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Mar 28 08:07:54 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "31 July 2018",
}
@Article{Crivellini:2019:OPS,
author = "Andrea Crivellini and Matteo Franciolini",
title = "{OpenMP} Parallelization Strategies for a
Discontinuous {Galerkin} Solver",
journal = j-INT-J-PARALLEL-PROG,
volume = "47",
number = "5--6",
pages = "838--873",
month = dec,
year = "2019",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-018-0589-3",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Sat Jul 25 06:58:52 MDT 2020",
bibsource = "http://link.springer.com/journal/10766/47/5;
http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s10766-018-0589-3",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
}
@Article{Daberdaku:2019:ACT,
author = "Sebastian Daberdaku",
title = "Accelerating the computation of triangulated molecular
surfaces with {OpenMP}",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "7",
pages = "3426--3470",
month = jul,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02803-y",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:20 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/7;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Dalcin:2019:FPM,
author = "Lisandro Dalcin and Mikael Mortensen and David E.
Keyes",
title = "Fast parallel multidimensional {FFT} using advanced
{MPI}",
journal = j-J-PAR-DIST-COMP,
volume = "128",
number = "??",
pages = "137--150",
month = jun,
year = "2019",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2019.02.006",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon May 20 18:06:40 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S074373151830306X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Deng:2019:CBV,
author = "Y. Deng and T. Li and Y. Luo and X. Zhao",
title = "{CUDA}-Based Volume Rendering and Inspection for
Time-Varying Ultrasonic Testing Datasets",
journal = j-COMPUT-SCI-ENG,
volume = "21",
number = "5",
pages = "76--86",
month = sep # "\slash " # oct,
year = "2019",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2018.2875319",
ISSN = "1521-9615 (print), 1558-366x (electronic)",
ISSN-L = "1521-9615",
bibdate = "Mon Aug 19 06:40:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See corrections \cite{Deng:2020:CCB}.",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
keywords = "Acoustics; Data visualization; Image color analysis;
Real-time systems; Rendering (computer graphics);
Three-dimensional displays; Transfer functions",
}
@Article{Denis:2019:SPT,
author = "Alexandre Denis and Julien Jaeger and Emmanuel Jeannot
and Marc P{\'e}rache and Hugo Taboada",
title = "Study on progress threads placement and dedicated
cores for overlapping {MPI} nonblocking collectives on
manycore processor",
journal = j-IJHPCA,
volume = "33",
number = "6",
pages = "1240--1254",
day = "1",
month = nov,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342019860184",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:54 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019860184",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Deveci:2019:GMT,
author = "M. Deveci and K. D. Devine and K. Pedretti and M. A.
Taylor and S. Rajamanickam and {\"U}. V.
{\c{C}}ataly{\"u}rek",
title = "Geometric Mapping of Tasks to Processors on Parallel
Computers with Mesh or Torus Networks",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "9",
pages = "2018--2032",
month = sep,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2900043",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Aug 30 06:09:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "algorithmic optimizations; application program
interfaces; Bandwidth; communication interdependence;
contiguous allocation; contiguous block; Cray XK7;
E3SM/HOMME; finite difference methods; geometric
mapping; geometric partitioning; geometric partitioning
algorithm; geometric proximity; IBM BlueGene/Q; jagged
partitioning; load balancing; Measurement; mesh
networks; message passing; MiniGhost default mapping;
MPI tasks; multiprocessing systems; Network topology;
noncontiguous allocations; optimisation; parallel
applications; parallel computers; parallel machines;
Partitioning algorithms; processors; Program
processors; recursive bisection; resource allocation;
Resource management; sparse allocation; sparse node
allocation; spatial partitioning; structured finite
difference mini-application; Task analysis; Task
mapping; torus networks",
}
@Article{Diaz:2019:AOO,
author = "Jose Monsalve Diaz and Kyle Friedline and Swaroop
Pophale and Oscar Hernandez and David E. Bernholdt and
Sunita Chandrasekaran",
title = "Analysis of {OpenMP 4.5} Offloading in
Implementations: Correctness and Overhead",
journal = j-PARALLEL-COMPUTING,
volume = "89",
number = "??",
pages = "Article 102546",
month = nov,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2019.102546",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Mar 29 11:35:58 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819119301371",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Dieguez:2019:TPR,
author = "Adri{\'a}n P. Di{\'e}guez and Margarita Amor and
Ram{\'o}n Doallo",
title = "Tree Partitioning Reduction: A New Parallel Partition
Method for Solving Tridiagonal Systems",
journal = j-TOMS,
volume = "45",
number = "3",
pages = "31:1--31:26",
month = aug,
year = "2019",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3328731",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Sep 3 17:49:22 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=3328731",
abstract = "Solving tridiagonal linear-equation systems is a
fundamental computing kernel in a wide range of
scientific and engineering applications, and its
computation can be modeled with parallel algorithms.
These parallel solvers are typically designed to
compute problems whose data fit in a common
shared-memory space where all the cores taking part in
the computation have access. However, when the problem
size is large, data cannot be entirely stored in the
common shared-memory space, and a high number of
high-latency communications are performed. One
alternative is to partition the problem among different
memory spaces. At this point, conventional parallel
algorithms do not facilitate the partition of
computation in independent tiles, since each reduction
depends on equations that may be in different tiles.
This article proposes an algorithm based on a tree
reduction, called the Tree Partitioning Reduction (TPR)
method, which partitions the problem into independent
slices that can be partially computed in parallel
within different common shared-memory spaces. The TPR
method can be implemented for any parallel and
distributed programming paradigm. Furthermore, in this
work, TPR is efficiently implemented for CUDA GPUs to
solve large size problems, providing highly competitive
performance results with respect to existing packages,
being, on average, 22.03$ \times $ faster than
CUSPARSE.",
acknowledgement = ack-nhfb,
articleno = "31",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Diep:2019:TSS,
author = "Thanh-Dang Diep and Kien Trung Pham and Karl
F{\"u}rlinger and Nam Thoai",
title = "A time-stamping system to detect memory consistency
errors in {MPI} one-sided applications",
journal = j-PARALLEL-COMPUTING,
volume = "86",
number = "??",
pages = "36--44",
month = aug,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303235",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Dongarra:2019:PPL,
author = "Jack Dongarra and Mark Gates and Azzam Haidar and
Jakub Kurzak and Piotr Luszczek and Panruo Wu and
Ichitaro Yamazaki and Asim Yarkhan and Maksims
Abalenkovs and Negin Bagherpour and Sven Hammarling and
Jakub S{\'\i}stek and David Stevens and Mawussi Zounon
and Samuel D. Relton",
title = "{PLASMA}: Parallel Linear Algebra Software for
Multicore Using {OpenMP}",
journal = j-TOMS,
volume = "45",
number = "2",
pages = "16:1--16:35",
month = apr,
year = "2019",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3264491",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon May 6 18:23:42 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=3264491",
abstract = "The recent version of the Parallel Linear Algebra
Software for Multicore Architectures (PLASMA) library
is based on tasks with dependencies from the OpenMP
standard. The main functionality of the library is
presented. Extensive benchmarks are targeted on three
recent multicore and manycore architectures, namely, an
Intel Xeon, Intel Xeon Phi, and IBM POWER 8
processors.",
acknowledgement = ack-nhfb,
articleno = "16",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Doulis:2019:CMP,
author = "Georgios Doulis and J{\"o}rg Frauendiener and Chris
Stevens and Ben Whale",
title = "{COFFEE} --- an {MPI}-parallelized {Python} package
for the numerical evolution of differential equations",
journal = j-SOFTWAREX,
volume = "10",
number = "??",
pages = "Article 100283",
month = jul # "\slash " # dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1016/j.softx.2019.100283",
ISSN = "2352-7110",
ISSN-L = "2352-7110",
bibdate = "Fri Apr 9 16:04:36 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/softwarex.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S2352711019300950",
acknowledgement = ack-nhfb,
fjournal = "SoftwareX",
journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues",
}
@Article{Edmonds:2019:HAS,
author = "Mark Edmonds and Tanvir Atahary and Scott Douglass and
Tarek Taha",
title = "Hardware Accelerated Semantic Declarative Memory
Systems through {CUDA} and {MapReduce}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "3",
pages = "601--614",
month = mar,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2018.2866848",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 14 06:19:14 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.computer.org/csdl/trans/td/2019/03/08444694-abs.html",
acknowledgement = ack-nhfb,
journal-URL = "http://www.computer.org/tpds/archives.htm",
}
@Article{Faict:2019:MGI,
author = "Thomas Faict and Erik H. D'Hollander and Bart
Goossens",
title = "Mapping a Guided Image Filter on the {HARP}
Reconfigurable Architecture Using {OpenCL}",
journal = j-ALGORITHMS-BASEL,
volume = "12",
number = "8",
month = aug,
year = "2019",
CODEN = "ALGOCH",
DOI = "https://doi.org/10.3390/a12080149",
ISSN = "1999-4893 (electronic)",
ISSN-L = "1999-4893",
bibdate = "Thu May 28 08:40:45 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/algorithms.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.mdpi.com/1999-4893/12/8/149",
acknowledgement = ack-nhfb,
articleno = "149",
fjournal = "149",
journal-URL = "https://www.mdpi.com/",
pagecount = "??",
}
@Article{Fan:2019:BPA,
author = "Xing Fan and Oliver Sinnen and Nasser Giacaman",
title = "Balancing parallelization and asynchronization in
event-driven programs with {OpenMP}",
journal = j-CCPE,
volume = "31",
number = "4",
pages = "e4959:1--e4959:??",
day = "25",
month = feb,
year = "2019",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4959",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Thu Mar 28 08:07:55 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "21 September 2018",
}
@Article{Fan:2019:SAO,
author = "Xing Fan and Oliver Sinnen and Nasser Giacaman",
title = "Supporting asynchronization in {OpenMP} for
event-driven programming",
journal = j-PARALLEL-COMPUTING,
volume = "82",
number = "??",
pages = "57--74",
month = "????",
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.03.008",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Tue Mar 12 06:47:09 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118300838",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Figueiredo:2019:MOP,
author = "Marco Antonio C. de {Figueiredo, Jr.} and Edans F. de
Oliveira Sandes and Genaina N. Rodrigues and George L.
M. Teodoro and Alba Cristina M. A. de Melo",
title = "{MASA-OpenCL}: Parallel pruned comparison of long
{DNA} sequences with {OpenCL}",
journal = j-CCPE,
volume = "31",
number = "11",
pages = "e5039:1--e5039:??",
day = "10",
month = jun,
year = "2019",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5039",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Oct 12 11:00:02 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "21 October 2018",
}
@Article{Fujita:2019:EIM,
author = "Hajime Fujita and Chongxiao Cao and Sayantan Sur and
Charles Archer and Erik Paulson and Maria Garzaran",
title = "Efficient implementation of {MPI-3} {RMA} over
{openFabrics} interfaces",
journal = j-PARALLEL-COMPUTING,
volume = "87",
number = "??",
pages = "1--10",
month = sep,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2019.04.008",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:02 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303843",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gittens:2019:AAS,
author = "Alex Gittens and Kai Rothauge and Shusen Wang and
Michael W. Mahoney and Jey Kottalam and Lisa Gerhardt
and Prabhat and Michael Ringenburg and Kristyn
Maschhoff",
title = "{Alchemist}: an {Apache Spark} $ \leftrightarrow $
{MPI} interface",
journal = j-CCPE,
volume = "31",
number = "16",
pages = "e5026:1--e5026:??",
day = "25",
month = aug,
year = "2019",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5026",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Oct 12 11:00:04 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "12 November 2018",
}
@Article{Gloster:2019:CBP,
author = "Andrew Gloster and Lennon {\'O} N{\'a}raigh and Khang
Ee Pang",
title = "{cuPentBatch} --- a batched pentadiagonal solver for
{NVIDIA} {GPUs}",
journal = j-COMP-PHYS-COMM,
volume = "241",
number = "??",
pages = "113--121",
month = aug,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2019.03.016",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue May 14 10:01:33 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465519300979",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Gloster:2019:CCF,
author = "Andrew Gloster and Lennon {{\'O} N{\'a}raigh}",
title = "{cuSten} --- {CUDA} finite difference and stencil
library",
journal = j-SOFTWAREX,
volume = "10",
number = "??",
pages = "Article 100337",
month = jul # "\slash " # dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1016/j.softx.2019.100337",
ISSN = "2352-7110",
ISSN-L = "2352-7110",
bibdate = "Fri Apr 9 16:04:36 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/softwarex.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S2352711019300561",
acknowledgement = ack-nhfb,
fjournal = "SoftwareX",
journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues",
}
@Article{Gropp:2019:GEI,
author = "William Gropp and Rajeev Thakur",
title = "{Guest Editor}'s introduction: Special issue on best
papers from {EuroMPI\slash USA 2017}",
journal = j-PARALLEL-COMPUTING,
volume = "84",
number = "??",
pages = "62--62",
month = may,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819119300560",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gropp:2019:UNS,
author = "William D. Gropp",
title = "Using node and socket information to implement {MPI}
{Cartesian} topologies",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "98--108",
month = jul,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303156",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gueunet:2019:TBA,
author = "C. Gueunet and P. Fortin and J. Jomier and J. Tierny",
title = "Task-Based Augmented Contour Trees with {Fibonacci}
Heaps",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "8",
pages = "1889--1905",
month = aug,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2898436",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Aug 30 06:09:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fibquart.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "computation procedure; contour tree based
applications; Data analysis; data segmentation
applications; data structures; Data structures; data
visualisation; Data visualization; fast shared memory;
Fibonacci heaps; independent local tasks; intermediate
data structures; join split trees; multi-core
architecture; multi-threading; multicore computation;
OpenMP task runtime; parallel algorithm; parallel
algorithms; Parallel algorithms; parallel thanks;
Runtime; Scientific visualization; Task analysis; task
parallelism; task-based augmented contour trees;
topological data analysis; tree algorithm; trees
(mathematics)",
}
@Article{Hajihassani:2019:FAI,
author = "O. Hajihassani and S. K. Monfared and S. H. Khasteh
and S. Gorgin",
title = "Fast {AES} Implementation: A High-Throughput Bitsliced
Approach",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "10",
pages = "2211--2222",
month = oct,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2911278",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Dec 19 09:20:35 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "AES; byte-wise operations; computing process;
cryptography; CTR; CUDA; CUDA-enabled GPU; Data models;
data representation; data representation scheme; ECB;
Encryption; encryption throughput; fast AES
implementation; GPU; Graphics processing units;
high-performance; high-throughput bitsliced AES
implementation; high-throughput bitsliced approach;
logic circuits; parallel architectures; parallelization
capability; parallelization unit; S-box logic circuit;
ShiftRows; Standards; substitute bytes stage; Table
lookup; Throughput",
}
@Article{Hermanns:2019:MEI,
author = "Marc-Andr{\'e} Hermanns and Nathan T. Hjelm and
Michael Knobloch and Kathryn Mohror and Martin Schulz",
title = "The {MPI\_T} events interface: an early evaluation and
overview of the interface",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "119--130",
month = jul,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303314",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Holmes:2019:PPE,
author = "Daniel J. Holmes and Bradley Morgan and Anthony
Skjellum and Purushotham V. Bangalore and Srinivas
Sridharan",
title = "Planning for performance: Enhancing achievable
performance for {MPI} through persistent collective
operations",
journal = j-PARALLEL-COMPUTING,
volume = "81",
number = "??",
pages = "32--57",
month = jan,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.08.001",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Jan 7 15:25:21 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118302412",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Huckelheim:2019:RMA,
author = "Jan H{\"u}ckelheim and Paul Hovland and Michelle Mills
Strout and Jens-Dominik M{\"u}ller",
title = "Reverse-mode algorithmic differentiation of an
{OpenMP}-parallel compressible flow solver",
journal = j-IJHPCA,
volume = "33",
number = "1",
pages = "140--154",
day = "1",
month = jan,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342017712060",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:53 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342017712060",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Izadpanah:2019:PAP,
author = "Ramin Izadpanah and Benjamin A. Allan and Damian
Dechev and Jim Brandt",
title = "Production Application Performance Data Streaming for
System Monitoring",
journal = j-TOMPECS,
volume = "4",
number = "2",
pages = "8:1--8:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319498",
ISSN = "2376-3639",
bibdate = "Sat Sep 21 07:21:17 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tompecs.bib",
URL = "https://dl.acm.org/citation.cfm?id=3319498",
abstract = "In this article, we present an approach to streaming
collection of application performance data. Practical
application performance tuning and troubleshooting in
production high-performance computing (HPC)
environments requires an understanding of how
applications interact with the platform, including (but
not limited to) parallel programming libraries such as
Message Passing Interface (MPI). Several profiling and
tracing tools exist that collect heavy runtime data
traces either in memory (released only at application
exit) or on a file system (imposing an I/O load that
may interfere with the performance being measured).
Although these approaches are beneficial in development
stages and post-run analysis, a systemwide and
low-overhead method is required to monitor deployed
applications continuously. This method must be able to
collect information at both the application and system
levels to yield a complete performance picture. In our
approach, an application profiler collects application
event counters. A sampler uses an efficient
inter-process communication method to periodically
extract the application counters and stream them into
an infrastructure for performance data collection. We
implement a tool-set based on our approach and
integrate it with the Lightweight Distributed Metric
Service (LDMS) system, a monitoring system used on
large-scale computational platforms. LDMS provides the
infrastructure to create and gather streams of
performance data in a low overhead manner. We
demonstrate our approach using applications implemented
with MPI, as it is one of the most common standards for
the development of large-scale scientific applications.
We utilize our tool-set to study the impact of our
approach on an open source HPC application, Nalu. Our
tool-set enables us to efficiently identify patterns in
the behavior of the application without source-level
knowledge. We leverage LDMS to collect system-level
performance data and explore the correlation between
the system and application events. Also, we demonstrate
how our tool-set can help detect anomalies with a low
latency. We run tests on two different architectures: a
system enabled with Intel Xeon Phi and another system
equipped with Intel Xeon processor. Our overhead study
shows our method imposes at most 0.5\% CPU usage
overhead on the application in realistic deployment
scenarios.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Modeling and Performance
Evaluation of Computing Systems (TOMPECS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J1525",
}
@Article{Kallenborn:2019:MPC,
author = "Felix Kallenborn and Christian Hundt and Sebastian
B{\"o}ser and Bertil Schmidt",
title = "Massively parallel computation of atmospheric neutrino
oscillations on {CUDA}-enabled accelerators",
journal = j-COMP-PHYS-COMM,
volume = "234",
number = "??",
pages = "235--244",
month = jan,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.07.022",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Oct 16 18:11:50 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518302790",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Kang:2019:SAM,
author = "Qiao Kang and Jesper Larsson Tr{\"a}ff and Reda
Al-Bahrani and Ankit Agrawal and Alok Choudhary and
Wei-keng Liao",
title = "Scalable Algorithms for {MPI} Intergroup {Allgather}
and {Allgatherv}",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "220--230",
month = jul,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2019.04.015",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S016781911830320X",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Knap:2019:PEU,
author = "Marcin Knap and Pawe{\l} Czarnul",
title = "Performance evaluation of Unified Memory with
prefetching and oversubscription for selected parallel
{CUDA} applications on {NVIDIA} {Pascal} and {Volta}
{GPUs}",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "11",
pages = "7625--7645",
month = nov,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02966-8",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jul 25 07:17:51 MDT 2020",
bibsource = "http://link.springer.com/journal/11227/75/11;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/s11227-019-02966-8.pdf",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Knight:2019:TES,
author = "Louise Knight and Polona Stefanic and Matej Cigale and
Andrew C. Jones and Ian Taylor",
title = "Towards extending the {SWITCH} platform for
time-critical, cloud-based {CUDA} applications: Job
scheduling parameters influencing performance",
journal = j-FUT-GEN-COMP-SYS,
volume = "100",
number = "??",
pages = "542--556",
month = nov,
year = "2019",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2019.05.039",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Feb 10 12:55:01 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X18311014",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Kronbichler:2019:FMF,
author = "Martin Kronbichler and Katharina Kormann",
title = "Fast Matrix-Free Evaluation of Discontinuous
{Galerkin} Finite Element Operators",
journal = j-TOMS,
volume = "45",
number = "3",
pages = "29:1--29:40",
month = aug,
year = "2019",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3325864",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Sep 3 17:49:22 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=3325864",
abstract = "We present an algorithmic framework for matrix-free
evaluation of discontinuous Galerkin finite element
operators. It relies on fast quadrature with sum
factorization on quadrilateral and hexahedral meshes,
targeting general weak forms of linear and nonlinear
partial differential equations. Different algorithms
and data structures are compared in an in-depth
performance analysis. The implementations of the local
integrals are optimized by vectorization over several
cells and faces and an even-odd decomposition of the
one-dimensional interpolations. Up to 60\% of the
arithmetic peak on Intel Haswell, Broadwell, and
Knights Landing processors is reached when running from
caches and up to 40\% of peak when also considering the
access to vectors from main memory. On 2$ \times $14
Broadwell cores, the throughput is up to 2.2 billion
unknowns per second for the 3D Laplacian and up to 4
billion unknowns per second for the 3D advection on
affine geometries, close to a simple copy operation at
4.7 billion unknowns per second. Our experiments show
that MPI ghost exchange has a considerable impact on
performance and we present strategies to mitigate this
effect. Finally, various options for evaluating
geometry terms and their performance are discussed. Our
implementations are publicly available through the
deal.II finite element library.",
acknowledgement = ack-nhfb,
articleno = "29",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Kumar:2019:FOP,
author = "Ramavarmaraja Kishor Kumar and Vladimir Loncar and
Paulsamy Muruganandam and Sadhan K. Adhikari and Antun
Balaz",
title = "{C} and {Fortran} {OpenMP} programs for rotating
{Bose--Einstein} condensates",
journal = j-COMP-PHYS-COMM,
volume = "240",
number = "??",
pages = "74--82",
month = jul,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2019.03.004",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jun 14 08:12:51 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465519300827",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@TechReport{Laguna:2019:GPD,
author = "Ignacio Laguna and Paul C. Wood and Ranvijay Singh and
Saurabh Bagchi",
title = "{GPUMixer}: Performance-Driven Floating-Point Tuning
for {GPU} Scientific Applications",
type = "Report",
institution = "Lawrence Livermore National Laboratory",
address = "Livermore CA 94550, USA",
year = "2019",
bibdate = "Tue Aug 06 05:54:23 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://lagunaresearch.org/docs/isc-2019.pdf;
https://www.hpcwire.com/2019/08/05/llnl-purdue-researchers-harness-gpu-mixed-precision-for-accuracy-performance-tradeoff/",
abstract = "We present GPUMixer, a tool to perform mixed-precision
floating-point tuning on scientific GPU applications.
While precision tuning techniques are available, they
are designed for serial programs and are
accuracy-driven, i.e., they consider configurations
that satisfy accuracy constraints, but these
configurations may degrade performance. GPUMixer, in
contrast, presents a performance-driven approach for
tuning. We introduce a novel static analysis that finds
Fast Imprecise Sets (FISets), sets of operations on low
precision that minimize type conversions, which often
yield performance speedups. To estimate the relative
error introduced by GPU mixed-precision, we propose
shadow computations analysis for GPUs, the first of
this class for multi-threaded applications. GPUMixer
obtains performance improvements of up to 46.4\% of the
ideal speedup in comparison to only 20.7\% found by
state-of-the-art methods.",
acknowledgement = ack-nhfb,
remark = "Best paper award at the 33rd ISC High Performance
conference held June 16--20, 2019.",
}
@Article{Levy:2019:USE,
author = "Scott Levy and Kurt B. Ferreira and Whit Schonbein and
Ryan E. Grant and Matthew G. F. Dosanjh",
title = "Using simulation to examine the effect of {MPI}
message matching costs on application performance",
journal = j-PARALLEL-COMPUTING,
volume = "84",
number = "??",
pages = "63--74",
month = may,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303272",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Li:2019:TBH,
author = "Bing Li and Mengjie Mao and Xiaoxiao Liu and Tao Liu
and Zihao Liu and Wujie Wen and Yiran Chen and Hai
(Helen) Li",
title = "Thread Batching for High-performance Energy-efficient
{GPU} Memory Design",
journal = j-JETC,
volume = "15",
number = "4",
pages = "39:1--39:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3330152",
ISSN = "1550-4832",
bibdate = "Tue Dec 17 07:50:24 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jetc.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3330152",
abstract = "Massive multi-threading in GPU imposes tremendous
pressure on memory subsystems. Due to rapid growth in
thread-level parallelism of GPU and slowly improved
peak memory bandwidth, memory becomes a bottleneck of
GPU's performance and energy efficiency. In this
article, we propose an integrated architectural scheme
to optimize the memory accesses and therefore boost the
performance and energy efficiency of GPU. First, we
propose a thread batch enabled memory partitioning
(TEMP) to improve GPU memory access parallelism. In
particular, TEMP groups multiple thread blocks that
share the same set of pages into a thread batch and
applies a page coloring mechanism to bound each stream
multiprocessor (SM) to the dedicated memory banks.
After that, TEMP dispatches the thread batch to an SM
to ensure high-parallel memory-access streaming from
the different thread blocks. Second, a thread
batch-aware scheduling (TBAS) scheme is introduced to
improve the GPU memory access locality and to reduce
the contention on memory controllers and
interconnection networks. Experimental results show
that the integration of TEMP and TBAS can achieve up to
10.3\% performance improvement and 11.3\% DRAM energy
reduction across diverse GPU applications. We also
evaluate the performance interference of the mixed
CPU+GPU workloads when they are run on a heterogeneous
system that employs our proposed schemes. Our results
show that a simple solution can effectively ensure the
efficient execution of both GPU and CPU applications.",
acknowledgement = ack-nhfb,
articleno = "39",
fjournal = "ACM Journal on Emerging Technologies in Computing
Systems (JETC)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967",
}
@Article{Lima:2019:PEA,
author = "Jo{\~a}o Vicente Ferreira Lima and Issam Ra{\"\i}s and
Laurent Lef{\`e}vre and Thierry Gautier",
title = "Performance and energy analysis of {OpenMP} runtime
systems with dense linear algebra algorithms",
journal = j-IJHPCA,
volume = "33",
number = "3",
pages = "431--443",
day = "1",
month = may,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342018792079",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:53 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342018792079",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Liu:2019:MML,
author = "Qixiao Liu and Zhifeng Chen and Zhibin Yu",
title = "{MiC}: Multi-level Characterization and Optimization
of {GPGPU} Kernels",
journal = j-JETC,
volume = "15",
number = "3",
pages = "25:1--25:??",
month = jun,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3304108",
ISSN = "1550-4832",
bibdate = "Fri Nov 29 16:06:01 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jetc.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3304108",
abstract = "Graphics processing units (GPUs)$^1$ have enjoyed
increasing popularity in recent years, which benefits
from, for example, general-purpose GPU (GPGPU) for
parallel programs and new computing paradigms, such as
the Internet of Things (IoT). GPUs hold great potential
in providing effective solutions for big data analytics
while the demands for processing large quantities of
data in real time are also increasing. However, the
pervasive presence of GPUs on mobile devices presents
great challenges for GPGPU, mainly because GPGPU
integrates a large amount of processor arrays and
concurrent executing threads (up to hundreds of
thousands). In particular, the root causes of
performance loss in a GPGPU program can not be revealed
in detail by current approaches. In this article, we
propose MiC (Multi-level Characterization), a framework
that comprehensively characterizes GPGPU kernels at the
instruction, Basic Block (BBL), and thread levels.
Specifically, we devise Instruction Vectors (IV) and
Basic Blocks Vectors (BBV), a Thread Similarity Matrix
(TSM), and a Divergence Flow Statistics Graph (DFSG) to
profile information in each level. We use MiC to
provide insights into GPGPU kernels through the
characterizations of 34 kernels from popular GPGPU
benchmark suites such as Compute Unified Device
Architecture (CUDA) Software Development Kit (SDK),
Rodinia, and Parboil. In comparison with Central
Processing Unit (CPU) workloads, we conclude the key
findings as follows: (1) There are comparable
Instruction-Level Parallelism (ILP); (2) The BBL count
is significantly smaller than CPU workloads-only 22.8
on average; (3) The dynamic instruction count per
thread varies from dozens to tens of thousands and it
is extremely small compared to CPU benchmarks; (4) The
Pareto principle (also called 90/10 rule) does not
apply to GPGPU kernels while it pervasively exists in
CPU programs; (5) The loop patterns are dramatically
different from those in CPU workloads; (6) The branch
ratio is lower than that of CPU programs but higher
than pure GPU workloads. In addition, we have also
shown how TSM and DFSG are used to characterize the
branch divergence in a visual way, to enable the
analysis of thread behavior in GPGPU programs. In
addition, we show an optimization case for a GPGPU
kernel from the bottleneck identified through its
characterization result, which improves 16.8\%
performance.",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "ACM Journal on Emerging Technologies in Computing
Systems (JETC)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967",
}
@Article{Lopes:2019:FBD,
author = "Paulo A. C. Lopes and Satyendra Singh Yadav and
Aleksandar Ilic and Sarat Kumar Patra",
title = "Fast block distributed {CUDA} implementation of the
{Hungarian} algorithm",
journal = j-J-PAR-DIST-COMP,
volume = "130",
number = "??",
pages = "50--62",
month = aug,
year = "2019",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2019.03.014",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon May 20 18:06:40 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519302254",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Lopez-Gomez:2019:ESP,
author = "Javier L{\'o}pez-G{\'o}mez and Javier Fern{\'a}ndez
Mu{\~n}oz and David del Rio Astorga and Manuel F. Dolz
and J. Daniel Garcia",
title = "Exploring stream parallel patterns in distributed
{MPI} environments",
journal = j-PARALLEL-COMPUTING,
volume = "84",
number = "??",
pages = "24--36",
month = may,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303442",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Lorenzon:2019:ASO,
author = "A. F. Lorenzon and C. C. {de Oliveira} and J. D. Souza
and A. C. S. Beck",
title = "{Aurora}: Seamless Optimization of {OpenMP}
Applications",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "5",
pages = "1007--1021",
month = may,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2018.2872992",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Aug 30 06:09:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "application program interfaces; Aurora; code
transformation; efficiency 91.0 percent; Energy-Delay
Product; Feedback-Driven Threading; Hardware;
Instruction sets; Ion radiation effects; Magnetosphere;
message passing; Message systems; Microarchitecture;
multicore processors; multiprocessing systems; OpenMP;
OpenMP applications; OpenMP feature; OpenMP framework;
optimization; parallel applications; parallel loop
region; parallel processing; Runtime; runtime
environments; seamless optimization; software
developers; standard OpenMP execution; thread-level
parallelism; Thread-level parallelism exploitation",
}
@Article{Losada:2019:LRR,
author = "Nuria Losada and George Bosilca and Aur{\'e}lien
Bouteiller and Patricia Gonz{\'a}lez and Mar{\'\i}a J.
Mart{\'\i}n",
title = "Local rollback for resilient {MPI} applications with
application-level checkpointing and message logging",
journal = j-FUT-GEN-COMP-SYS,
volume = "91",
number = "??",
pages = "450--464",
month = feb,
year = "2019",
CODEN = "FGSEVI",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Tue Feb 5 08:15:51 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.sciencedirect.com/science/article/pii/S0167739X18303443",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Lu:2019:PMM,
author = "Gangzhao Lu and Weizhe Zhang and Hui He and Laurence
T. Yang",
title = "Performance modeling for {MPI} applications with low
overhead fine-grained profiling",
journal = j-FUT-GEN-COMP-SYS,
volume = "90",
number = "??",
pages = "317--326",
month = jan,
year = "2019",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2018.08.018",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Tue Sep 18 14:07:59 MDT 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X18308252",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Mercan:2019:CCH,
author = "H. Mercan and C. Yilmaz and K. Kaya",
title = "{CHiP}: A Configurable Hybrid Parallel Covering Array
Constructor",
journal = j-IEEE-TRANS-SOFTW-ENG,
volume = "45",
number = "12",
pages = "1270--1291",
month = dec,
year = "2019",
CODEN = "IESEDJ",
DOI = "https://doi.org/10.1109/TSE.2018.2837759",
ISSN = "0098-5589 (print), 1939-3520 (electronic)",
ISSN-L = "0098-5589",
bibdate = "Thu Dec 12 06:35:49 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranssoftweng2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Software Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=32",
keywords = "Benchmark testing; constraint satisfaction problem;
Covering arrays; CUDA; Graphics processing units;
graphics processing units; metaheuristic search;
parallel computing; Parallel processing; Scalability;
Simulated annealing; Upper bound",
}
@Article{Mironov:2019:EMO,
author = "Vladimir Mironov and Alexander Moskovsky and Michael
D'Mello and Yuri Alexeev",
title = "An efficient {MPI\slash OpenMP} parallelization of the
{Hartree--Fock--Roothaan} method for the first
generation of {Intel{\reg} Xeon Phi{\TM}} processor
architecture",
journal = j-IJHPCA,
volume = "33",
number = "1",
pages = "212--224",
day = "1",
month = jan,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342017732628",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:53 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342017732628",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Oh:2019:HPT,
author = "S. Oh and N. Park and J. Jang and L. Sael and U.
Kang",
title = "High-Performance {Tucker} Factorization on
Heterogeneous Platforms",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "10",
pages = "2237--2248",
month = oct,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2908639",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Dec 19 09:20:35 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "Computer science; factor matrices; GPGPU; graph
theory; Graphics processing units; GTA scales;
GTA-FULL; GTA-PART; heterogeneous computing;
Heterogeneous networks; heterogeneous platforms;
high-performance Tucker factorization; large-scale
multidimensional data; matrix decomposition; Memory
management; memory requirements; Motion pictures;
OpenCL; row-wise update rule; Scalability; Tensor
analysis; tensor factorization algorithms; tucker
factorization",
}
@Article{Ortega:2019:CAC,
author = "G. Ortega and E. M. T. Hendrix and I. Garc{\'\i}a",
title = "A {CUDA} approach to compute perishable inventory
control policies using value iteration",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "3",
pages = "1580--1593",
month = mar,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-018-2692-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:17 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/3;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/s11227-018-2692-z.pdf",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Otero:2019:OAA,
author = "Evelyn Otero and Jing Gong and Misun Min and Paul
Fischer and Philipp Schlatter and Erwin Laure",
title = "{OpenACC} acceleration for the {$ P_N $--$ P_{N - 2}
$} algorithm in {Nek5000}",
journal = j-J-PAR-DIST-COMP,
volume = "132",
number = "??",
pages = "69--78",
month = oct,
year = "2019",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2019.05.010",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Sep 13 10:25:20 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731518305549",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Park:2019:DBO,
author = "Sanghyun Park and Taeweon Suh",
title = "{DQN}-based {OpenCL} workload partition for
performance optimization",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "8",
pages = "4875--4893",
month = aug,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02766-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:21 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/8;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Perez:2019:ATO,
author = "B. P{\'e}rez and E. Stafford and J. L. Bosque and R.
Beivide and S. Mateo and X. Teruel and X. Martorell and
E. Ayguad{\'e}",
title = "Auto-tuned {OpenCL} kernel co-execution in {OmpSs} for
heterogeneous systems",
journal = j-J-PAR-DIST-COMP,
volume = "125",
number = "??",
pages = "45--57",
month = mar,
year = "2019",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2018.11.001",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Mon Jan 7 07:58:40 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731518308189",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Pikle:2019:AFE,
author = "Nileshchandra K. Pikle and Shailesh R. Sathe and
Arvind Y. Vyavahare",
title = "Accelerating the finite element analysis of
functionally graded materials using fixed-grid strategy
on {CUDA}-enabled {GPUs}",
journal = j-CCPE,
volume = "31",
number = "17",
pages = "e5207:1--e5207:??",
day = "10",
month = sep,
year = "2019",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5207",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Sat Oct 12 11:00:05 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "03 April 2019",
}
@Article{Pirkelbauer:2019:BTF,
author = "Peter Pirkelbauer and Amalee Wilson and Christina
Peterson and Damian Dechev",
title = "{Blaze-Tasks}: a Framework for Computing Parallel
Reductions over Tasks",
journal = j-TACO,
volume = "15",
number = "4",
pages = "66:1--66:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3293448",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Tue Jan 8 17:20:00 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Compared to threads, tasks are a more fine-grained
alternative. The task parallel programming model offers
benefits in terms of better performance portability and
better load-balancing for problems that exhibit
nonuniform workloads. A common scenario of task
parallel programming is that a task is recursively
decomposed into smaller sub-tasks. Depending on the
problem domain, the number of created sub-tasks may be
nonuniform, thereby creating potential for significant
load imbalances in the system. Dynamic load-balancing
mechanisms will distribute the tasks across available
threads. The final result of a computation may be
modeled as a reduction over the results of all
sub-tasks. This article describes a simple, yet
effective prototype framework, Blaze-Tasks, for task
scheduling and task reductions on shared memory
architectures. The framework has been designed with
lock-free techniques and generic programming principles
in mind. Blaze-Tasks is implemented entirely in C++17
and is thus portable. To load-balance the computation,
Blaze-Tasks uses task stealing. To manage contention on
a task pool, the number of lock-free attempts to steal
a task depends on the distance between thief and pool
owner and the estimated number of tasks in a victim's
pool. This article evaluates the Blaze framework on
Intel and IBM dual-socket systems using nine benchmarks
and compares its performance with other task parallel
frameworks. While Cilk outperforms Blaze on Intel on
most benchmarks, the evaluation shows that Blaze is
competitive with OpenMP and other library-based
implementations. On IBM, the experiments show that
Blaze outperforms other approaches on most
benchmarks.",
acknowledgement = ack-nhfb,
articleno = "66",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Prades:2019:GJM,
author = "J. Prades and F. Silla",
title = "{GPU}-Job Migration: The {rCUDA} Case",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "12",
pages = "2718--2729",
month = dec,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2924433",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Dec 19 09:20:35 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "CUDA; GPU; Graphics processing units; Middleware;
migration; Proposals; rCUDA; Resource management;
Virtual machining; virtualization; Virtualization",
}
@Article{Reano:2019:APP,
author = "Carlos Rea{\~n}o and Javier Prades and Federico
Silla",
title = "Analyzing the performance\slash power tradeoff of the
{rCUDA} middleware for future exascale systems",
journal = j-J-PAR-DIST-COMP,
volume = "132",
number = "??",
pages = "344--362",
month = oct,
year = "2019",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Fri Sep 13 10:25:20 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519303491",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Reano:2019:SIN,
author = "Carlos Rea{\~n}o and Federico Silla",
title = "On the support of inter-node {P2P} {GPU} memory copies
in {rCUDA}",
journal = j-J-PAR-DIST-COMP,
volume = "127",
number = "??",
pages = "28--43",
month = may,
year = "2019",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2018.12.011",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Mar 14 15:55:59 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519300255",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Riebler:2019:TAH,
author = "Heinrich Riebler and Gavin Vaz and Tobias Kenter and
Christian Plessl",
title = "Transparent Acceleration for Heterogeneous Platforms
With Compilation to {OpenCL}",
journal = j-TACO,
volume = "16",
number = "2",
pages = "14:1--14:??",
month = may,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3319423",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Jul 26 14:25:54 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Multi-accelerator platforms combine CPUs and different
accelerator architectures within a single compute node.
Such systems are capable of processing parallel
workloads very efficiently while being more energy
efficient than regular systems consisting of CPUs only.
However, the architectures of such systems are diverse,
forcing developers to port applications to each
accelerator using different programming languages,
models, tools, and compilers. Developers not only
require domain-specific knowledge but also need to
understand the low-level accelerator details, leading
to an increase in the design effort and costs. To
tackle this challenge, we propose a compilation
approach and a practical realization called HTrOP that
is completely transparent to the user. HTrOP is able to
automatically analyze a sequential CPU application,
detect computational hotspots, and generate parallel
OpenCL host and kernel code. The potential of HTrOP is
demonstrated by offloading hotspots to different
OpenCL-enabled resources (currently the CPU, the
general-purpose GPU, and the manycore Intel Xeon Phi)
for a broad set of benchmark applications. We present
an in-depth evaluation of our approach in terms of
performance gains and energy savings, taking into
account all static and dynamic overheads. We are able
to achieve speedups and energy savings of up to two
orders of magnitude, if an application has sufficient
computational intensity, when compared to a natively
compiled application.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Roth:2019:AOC,
author = "{\'A}goston R{\'o}th",
title = "Algorithm 992: An {OpenGL}- and {C++}-based Function
Library for Curve and Surface Modeling in a Large Class
of Extended {Chebyshev} Spaces",
journal = j-TOMS,
volume = "45",
number = "1",
pages = "13:1--13:32",
month = mar,
year = "2019",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3284979",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Mon May 6 18:23:42 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=3284979",
abstract = "We propose a platform-independent multi-threaded
function library that provides data structures to
generate, differentiate, and render both the ordinary
basis and the normalized B-basis of a user-specified
extended Chebyshev (EC) space that comprises the
constants and can be identified with the solution space
of a constant-coefficient homogeneous linear
differential equation defined on a sufficiently small
interval. Using the obtained normalized B-bases, our
library can also generate, (partially) differentiate,
modify, and visualize a large family of so-called
B-curves and tensor product B-surfaces. Moreover, the
library also implements methods that can be used to
perform dimension elevation, to subdivide B-curves and
B-surfaces by means of de Casteljau-like B-algorithms,
and to generate basis transformations for the
B-representation of arbitrary integral curves and
surfaces that are described in traditional parametric
form by means of the ordinary bases of the underlying
EC spaces. Independently of the algebraic, exponential,
trigonometric, or mixed type of the applied EC space,
the proposed library is numerically stable and
efficient up to a reasonable dimension number and may
be useful for academics and engineers in the fields of
Approximation Theory, Computer Aided Geometric Design,
Computer Graphics, and Isogeometric and Numerical
Analysis.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{Ruhela:2019:EDM,
author = "Amit Ruhela and Hari Subramoni and Sourav Chakraborty
and Mohammadreza Bayatpour and Pouya Kousha and
Dhabaleswar K. (DK) Panda",
title = "Efficient design for {MPI} asynchronous progress
without dedicated resources",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "13--26",
month = jul,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2019.03.003",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303302",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Sala:2019:IBN,
author = "Kevin Sala and Xavier Teruel and Josep M. Perez and
Antonio J. Pe{\~n}a and Vicen{\c{c}} Beltran and Jesus
Labarta",
title = "Integrating blocking and non-blocking {MPI} primitives
with task-based programming models",
journal = j-PARALLEL-COMPUTING,
volume = "85",
number = "??",
pages = "153--166",
month = jul,
year = "2019",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2018.12.008",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303326",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Schardl:2019:TER,
author = "Tao B. Schardl and William S. Moses and Charles E.
Leiserson",
title = "{Tapir}: Embedding Recursive Fork-join Parallelism
into {LLVM}'s Intermediate Representation",
journal = j-TOPC,
volume = "6",
number = "4",
pages = "19:1--19:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365655",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Fri Dec 27 16:13:12 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3365655",
abstract = "Tapir (pronounced TAY-per) is a compiler intermediate
representation (IR) that embeds recursive fork-join
parallelism, as supported by task-parallel programming
platforms such as Cilk and OpenMP, into a mainstream
compiler's IR. Mainstream compilers typically treat
parallel linguistic constructs as syntactic sugar for
function calls into a parallel runtime. These calls
prevent the compiler from performing optimizations on
and across parallel control constructs. Remedying this
situation has generally been thought to require an
extensive reworking of compiler analyses and code
transformations to handle parallel semantics. Tapir
leverages the ``serial-projection property,'' which is
commonly satisfied by task-parallel programs, to handle
the semantics of these programs without an extensive
rework of the compiler. For recursive fork-join
programs that satisfy the serial-projection property,
Tapir enables effective compiler optimization of
parallel programs with only minor changes to existing
compiler analyses and code transformations. Tapir uses
the serial-projection property to order logically
parallel fine-grained tasks in the program's
control-flow graph. This ordered representation of
parallel tasks allows the compiler to optimize parallel
codes effectively with only minor modifications. For
example, to implement Tapir/LLVM, a prototype of Tapir
in the LLVM compiler, we added or modified less than
3,000 lines of LLVM's half-million-line core middle-end
functionality. These changes sufficed to enable LLVM's
existing compiler optimizations for serial
code-including loop-invariant-code motion,
common-subexpression elimination, and tail-recursion
elimination-to work with parallel control constructs
such as parallel loops and Cilk's Cilk_Spawn keyword.
Tapir also supports parallel optimizations, such as
loop scheduling, which restructure the parallel control
flow of the program. By making use of existing LLVM
optimizations and new parallel optimizations,
Tapir/LLVM can optimize recursive fork-join programs
more effectively than traditional compilation methods.
On a suite of 35 Cilk application benchmarks,
Tapir/LLVM produces more efficient executables for 30
benchmarks, with faster 18-core running times for 26 of
them, compared to a nearly identical compiler that
compiles parallel linguistic constructs the traditional
way.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Searles:2019:MOA,
author = "Robert Searles and Sunita Chandrasekaran and Wayne
Joubert and Oscar Hernandez",
title = "{MPI + OpenACC}: Accelerating radiation transport
mini-application, minisweep, on heterogeneous systems",
journal = j-COMP-PHYS-COMM,
volume = "236",
number = "??",
pages = "176--187",
month = mar,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.10.007",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Jan 28 16:49:58 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518303552",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Sharif:2019:APC,
author = "Hashim Sharif and Prakalp Srivastava and Muhammad
Huzaifa and Maria Kotsifakou and Keyur Joshi and Yasmin
Sarita and Nathan Zhao and Vikram S. Adve and Sasa
Misailovic and Sarita Adve",
title = "{ApproxHPVM}: a portable compiler {IR} for
accuracy-aware optimizations",
journal = j-PACMPL,
volume = "3",
number = "OOPSLA",
pages = "186:1--186:30",
month = oct,
year = "2019",
DOI = "https://doi.org/10.1145/3360612",
bibdate = "Fri Aug 7 19:22:30 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pacmpl.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3360612",
abstract = "We propose ApproxHPVM, a compiler IR and system
designed to enable accuracy-aware performance and
energy tuning on heterogeneous systems with multiple
compute units and approximation methods. ApproxHPVM
automatically translates end-to-end
application-portability across heterogeneous hardware
platforms and enables future capabilities like
accuracy-aware dynamic scheduling and design space
exploration.\par
ApproxHPVM incorporates three main components: (a) a
compiler IR with hardware-agnostic approximation
metrics, (b) a hardware-agnostic accuracy-tuning phase
to identify error-tolerant computations, and (c) an
accuracy-aware hardware scheduler that maps
error-tolerant computations to approximate hardware
components. As ApproxHPVM does not incorporate any
hardware-specific knowledge as part of the IR, it can
serve as a portable virtual ISA that can be shipped to
all kinds of hardware platforms.\par
We evaluate our framework on nine benchmarks from the
deep learning domain and five image processing
benchmarks. Our results show that our framework can
offload chunks of approximable computations to
special-purpose accelerators that provide significant
gains in performance and energy, while staying within
user-specified application-level quality metrics with
high probability. Across the 14 benchmarks, we observe
from $1$--$ 9 \times $ performance speedups and $
1.1$--$ 11.3 \times $ energy reduction for very small
reductions in accuracy.",
acknowledgement = ack-nhfb,
articleno = "186",
fjournal = "Proceedings of the ACM on Programming Languages",
journal-URL = "https://pacmpl.acm.org/",
}
@Article{Shea:2019:HSD,
author = "Colin Shea and Tinoosh Mohsenin",
title = "Heterogeneous Scheduling of Deep Neural Networks for
Low-power Real-time Designs",
journal = j-JETC,
volume = "15",
number = "4",
pages = "36:1--36:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358699",
ISSN = "1550-4832",
bibdate = "Tue Dec 17 07:50:24 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jetc.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358699",
abstract = "Deep neural networks have become the readiest answer
to a range of application challenges including image
recognition, stock analysis, natural language
processing, and biomedical applications such as seizure
detection. All while outperforming prior leading
solutions that relied heavily on hand-engineered
techniques. However, deployment of these neural
networks often requires high-computational and
memory-intensive solutions. These requirements make it
challenging to deploy Deep Neural Networks (DNNs) in
embedded, real-time low-power applications where
classic architectures, GPUs and CPUs, still impose
significant power burden. Systems-on-Chip (SoC) with
Field-programmable Gate Arrays (FPGAs) can be used to
improve performance and allow more fine-grain control
of resources than CPUs or GPUs, but it is difficult to
find the optimal balance between hardware and software
to improve DNN efficiency. In the current research
literature there have been few proposed solutions to
address optimizing hardware and software deployments of
DNNs in embedded low-power systems. To address the
computation resource restriction and low-power needs
for deploying these networks, we describe and implement
a domain-specific metric model for optimizing task
deployment on differing platforms, hardware and
software. Next, we propose a DNN hardware accelerator
called Scalable Low-power Accelerator for real-time
deep neural Networks (SCALENet) that includes
multithreaded software workers. Finally, we propose a
heterogeneous aware scheduler that uses the
DNN-specific metric models and the SCALENet accelerator
to allocate a task to a resource based on solving a
numerical cost for a series of domain objectives. To
demonstrate the applicability of our contribution, we
deploy nine modern deep network architectures, each
containing a different number of parameters within the
context of two different neural network applications:
image processing and biomedical seizure detection.
Utilizing the metric modeling techniques integrated
into the heterogeneous aware scheduler and the SCALENet
accelerator, we demonstrate the ability to meet
computational requirements, adapt to multiple
architectures, and lower power by providing an
optimized task to resource allocation. Our
heterogeneous aware scheduler improves power saving by
decreasing power consumption by 10\% of the total
system power, does not affect the accuracy of the
networks, and still meets the real-time deadlines. We
demonstrate the ability to achieve parity with or
exceed the energy efficiency of NVIDIA GPUs when
evaluated against Jetson TK1 with embedded GPU SoC and
with a 4$ \times $ power savings in a power envelope of
2.0W. When compared to existing FPGA-based
accelerators, SCALENet's accelerator and heterogeneous
aware scheduler achieves a 4$ \times $ improvement in
energy efficiency.",
acknowledgement = ack-nhfb,
articleno = "36",
fjournal = "ACM Journal on Emerging Technologies in Computing
Systems (JETC)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967",
}
@Article{Shekofteh:2019:MSG,
author = "S.-Kazem Shekofteh and Hamid Noori and Mahmoud
Naghibzadeh and Hadi Sadoghi Yazdi and Holger
Fr{\"o}ning",
title = "Metric Selection for {GPU} Kernel Classification",
journal = j-TACO,
volume = "15",
number = "4",
pages = "68:1--68:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3295690",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Tue Jan 8 17:20:00 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Graphics Processing Units (GPUs) are vastly used for
running massively parallel programs. GPU kernels
exhibit different behavior at runtime and can usually
be classified in a simple form as either
``compute-bound'' or ``memory-bound.'' Recent GPUs are
capable of concurrently running multiple kernels, which
raises the question of how to most appropriately
schedule kernels to achieve higher performance. In
particular, co-scheduling of compute-bound and
memory-bound kernels seems promising. However, its
benefits as well as drawbacks must be determined along
with which kernels should be selected for a concurrent
execution. Classifying kernels can be performed online
by instrumentation based on performance counters. This
work conducts a thorough analysis of the metrics
collected from various benchmarks from Rodinia and CUDA
SDK. The goal is to find the minimum number of
effective metrics that enables online classification of
kernels with a low overhead. This study employs a
wrapper-based feature selection method based on the
Fisher feature selection criterion. The results of
experiments show that to classify kernels with a high
accuracy, only three and five metrics are sufficient on
a Kepler and a Pascal GPU, respectively. The proposed
method is then utilized for a runtime scheduler. The
results show an average speedup of 1.18$ \times $ and
1.1$ \times $ compared with a serial and a random
scheduler, respectively.",
acknowledgement = ack-nhfb,
articleno = "68",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Shterenlikht:2019:MVF,
author = "Anton Shterenlikht and Luis Cebamanos",
title = "{MPI} vs {Fortran} coarrays beyond 100k cores: {$3$D}
cellular automata",
journal = j-PARALLEL-COMPUTING,
volume = "84",
number = "??",
pages = "37--49",
month = may,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fortran3.bib;
http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303181",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Simmendinger:2019:ISG,
author = "Christian Simmendinger and Roman Iakymchuk and Luis
Cebamanos and Dana Akhmetova and Valeria Bartsch and
Tiberiu Rotaru and Mirko Rahn and Erwin Laure and
Stefano Markidis",
title = "Interoperability strategies for {GASPI} and {MPI} in
large-scale scientific applications",
journal = j-IJHPCA,
volume = "33",
number = "3",
pages = "554--568",
day = "1",
month = may,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342018808359",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:53 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342018808359",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Song:2019:PGA,
author = "You Song and Siyu Yang and Jinzhi Lei",
title = "{ParaCells}: a {GPU} Architecture for Cell-Centered
Models in Computational Biology",
journal = j-TCBB,
volume = "16",
number = "3",
pages = "994--1006",
month = may,
year = "2019",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2018.2814570",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Fri Aug 23 11:22:19 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
abstract = "In computational biology, the hierarchy of biological
systems requires the development of flexible and
powerful computational tools. Graphics processing unit
GPU architecture has been a suitable device for
parallel computing in simulating multi-cellular
systems. However, in modeling complex biological
systems, scientists often face two tasks, mathematical
formulation and skillful programming. In particular,
specific programming skills are needed for GPU
programming. Therefore, the development of an
easy-to-use computational architecture, which utilizes
GPU for parallel computing and provides intuitive
interfaces for simple implementation, is needed so that
general scientists can perform GPU simulations without
knowing much about the GPU architecture. Here, we
introduce ParaCells, a cell-centered GPU simulation
architecture for NVIDIA compute unified device
architecture CUDA. ParaCells was designed as a
versatile architecture that connects the user logic in
C++ with NVIDIA CUDA runtime and is specific to the
modeling of multi-cellular systems. An advantage of
ParaCells is its object-oriented model declaration,
which allows it to be widely applied to many biological
systems through the combination of basic biological
concepts. We test ParaCells with two applications. Both
applications are significantly faster when compared
with sequential as well as parallel OpenMP and OpenACC
implementations. Moreover, the simulation programs
based on ParaCells are cleaner and more readable than
other versions.",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J954",
}
@Article{Speck:2019:APP,
author = "Robert Speck",
title = "{Algorithm 997}: {pySDC}-Prototyping Spectral Deferred
Corrections",
journal = j-TOMS,
volume = "45",
number = "3",
pages = "35:1--35:23",
month = aug,
year = "2019",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3310410",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Sep 3 17:49:22 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/citation.cfm?id=3310410",
abstract = "In this article, we present the Python framework pySDC
for solving collocation problems with spectral deferred
correction (SDC) methods and their time-parallel
variant PFASST, the parallel full approximation scheme
in space and time. pySDC features many implementations
of SDC and PFASST, from simple implicit timestepping to
high-order implicit-explicit or multi-implicit
splitting and multilevel SDCs. The software package
comes with many different, preimplemented examples and
has seven tutorials to help new users with their first
steps. Time parallelism is implemented either in an
emulated way for debugging and prototyping or using MPI
for benchmarking. The code is fully documented and
tested using continuous integration, including most
results of previous publications. Here, we describe the
structure of the code by taking two different
perspectives: those of the user and those of the
developer. The first sheds light on the front-end, the
examples, and the tutorials, and the second is used to
describe the underlying implementation and the data
structures. We show three different examples to
highlight various aspects of the implementation, the
capabilities, and the usage of pySDC. In addition,
couplings to the FEniCS framework and PETSc, the latter
including spatial parallelism with MPI, are
described.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "http://dl.acm.org/pub.cfm?id=J782",
}
@Article{St-Onge:2019:ESS,
author = "Guillaume St-Onge and Jean-Gabriel Young and Laurent
H{\'e}bert-Dufresne and Louis J. Dub{\'e}",
title = "Efficient sampling of spreading processes on complex
networks using a composition and rejection algorithm",
journal = j-COMP-PHYS-COMM,
volume = "240",
number = "??",
pages = "30--37",
month = jul,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2019.02.008",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jun 14 08:12:51 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465519300608",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Sultana:2019:FRB,
author = "Nawrin Sultana and Martin R{\"u}fenacht and Anthony
Skjellum and Ignacio Laguna and Kathryn Mohror",
title = "Failure recovery for bulk synchronous applications
with {MPI} stages",
journal = j-PARALLEL-COMPUTING,
volume = "84",
number = "??",
pages = "1--14",
month = may,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819118303260",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Tang:2019:MNT,
author = "Yibin Tang and Ying Wang and Huawei Li and Xiaowei
Li",
title = "{MV-Net}: Toward Real-Time Deep Learning on Mobile
{GPGPU} Systems",
journal = j-JETC,
volume = "15",
number = "4",
pages = "35:1--35:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3358696",
ISSN = "1550-4832",
bibdate = "Tue Dec 17 07:50:24 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jetc.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3358696",
abstract = "Recently the development of deep learning has been
propelling the sheer growth of vision and speech
applications on lightweight embedded and mobile
systems. However, the limitation of computation
resource and power delivery capability in embedded
platforms is recognized as a significant bottleneck
that prevents the systems from providing real-time deep
learning ability, since the inference of deep
convolutional neural networks (CNNs) and recurrent
neural networks (RNNs) involves large quantities of
weights and operations. Particularly, how to provide
quality-of-services (QoS)-guaranteed neural network
inference ability in the multitask execution
environment of multicore SoCs is even more complicated
due to the existence of resource contention. In this
article, we present a novel deep neural network
architecture, MV-Net, which provides performance
elasticity and contention-aware self-scheduling ability
for QoS enhancement in mobile computing systems. When
the constraints of QoS, output accuracy, and resource
contention status of the system change, MV-Net can
dynamically reconfigure the corresponding neural
network propagation paths and thus achieves an
effective tradeoff between neural network computational
complexity and prediction accuracy via approximate
computing. The experimental results show that (1)
MV-Net significantly improves the performance
flexibility of current CNN models and makes it possible
to provide always-guaranteed QoS in a multitask
environment, and (2) it satisfies the
quality-of-results (QoR) requirement, outperforming the
baseline implementation significantly, and improves the
system energy efficiency at the same time.",
acknowledgement = ack-nhfb,
articleno = "35",
fjournal = "ACM Journal on Emerging Technologies in Computing
Systems (JETC)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J967",
}
@Article{Tang:2019:QDL,
author = "Xulong Tang and Ashutosh Pattnaik and Onur Kayiran and
Adwait Jog and Mahmut Taylan Kandemir and Chita Das",
title = "Quantifying Data Locality in Dynamic Parallelism in
{GPUs}",
journal = j-SIGMETRICS,
volume = "47",
number = "1",
pages = "25--26",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3376930.3376947",
ISSN = "0163-5999 (print), 1557-9484 (electronic)",
ISSN-L = "0163-5999",
bibdate = "Mon Jan 27 06:15:26 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigmetrics.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3376930.3376947",
abstract = "Dynamic parallelism (DP) is a new feature of emerging
GPUs that allows new kernels to be generated and
scheduled from the device-side (GPU) without the
host-side (CPU) intervention. To efficiently support
DP, one of the major challenges is to saturate the
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGMETRICS Performance Evaluation Review",
journal-URL = "https://dl.acm.org/loi/sigmetrics",
}
@Article{Teijeiro:2019:OPS,
author = "Carlos Teijeiro and Thomas Hammerschmidt and Ralf
Drautz and Godehard Sutmann",
title = "Optimized parallel simulations of analytic bond-order
potentials on hybrid shared\slash distributed memory
with {MPI} and {OpenMP}",
journal = j-IJHPCA,
volume = "33",
number = "2",
pages = "227--241",
day = "1",
month = mar,
year = "2019",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342017727060",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Wed Oct 9 14:35:53 MDT 2019",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342017727060",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Teunissen:2019:GML,
author = "J. Teunissen and R. Keppens",
title = "A geometric multigrid library for quadtree\slash
octree {AMR} grids coupled to {MPI-AMRVAC}",
journal = j-COMP-PHYS-COMM,
volume = "245",
number = "??",
pages = "Article 106866",
month = dec,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2019.106866",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Tue Oct 29 11:44:58 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S001046551930253X",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Tian:2019:GAB,
author = "Tian Tian and Dunwei Gong and Fei-Ching Kuo and Huai
Liu",
title = "Genetic algorithm based test data generation for {MPI}
parallel programs with blocking communication",
journal = j-J-SYST-SOFTW,
volume = "155",
number = "??",
pages = "130--144",
month = sep,
year = "2019",
CODEN = "JSSODM",
ISSN = "0164-1212 (print), 1873-1228 (electronic)",
ISSN-L = "0164-1212",
bibdate = "Wed Oct 16 06:54:20 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsystsoftw.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0164121219300810",
acknowledgement = ack-nhfb,
fjournal = "Journal of Systems and Software",
journal-URL = "http://www.sciencedirect.com/science/journal/01641212",
}
@Article{Tu:2019:AOS,
author = "Chia-Heng Tu and Te-Sheng Lin",
title = "Augmenting Operating Systems with {OpenCL}
Accelerators",
journal = j-TODAES,
volume = "24",
number = "3",
pages = "30:1--30:29",
month = jun,
year = "2019",
CODEN = "ATASFO",
DOI = "https://doi.org/10.1145/3315569",
ISSN = "1084-4309 (print), 1557-7309 (electronic)",
ISSN-L = "1084-4309",
bibdate = "Thu Jan 30 09:00:30 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/todaes.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3315569",
abstract = "Heterogeneous computing leverages more than one kind
of processors to boost the performance of user-space
applications with the heterogeneous programming
languages, e.g., OpenCL. While some works have been
done to accelerate the computations required by
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "30",
fjournal = "ACM Transactions on Design Automation of Electronic
Systems",
journal-URL = "https://dl.acm.org/loi/todaes",
}
@Article{Utterback:2019:POR,
author = "Robert Utterback and Kunal Agrawal and I-Ting Angelina
Lee and Milind Kulkarni",
title = "Processor-Oblivious Record and Replay",
journal = j-TOPC,
volume = "6",
number = "4",
pages = "20:1--20:??",
month = dec,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365659",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Fri Dec 27 16:13:12 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3365659",
abstract = "Record-and-replay systems are useful tools for
debugging non-deterministic parallel programs by first
recording an execution and then replaying that
execution to produce the same access pattern. Existing
record-and-replay systems generally target thread-based
execution models, and record the behaviors and
interleavings of individual threads. Dynamic
multithreaded languages and libraries, such as the Cilk
family, OpenMP, TBB, and the like, do not have a notion
of threads. Instead, these languages provide a
processor-oblivious model of programming, where
programs expose task parallelism using high-level
constructs such as spawn/sync without regard to the
number of threads/cores available to run the program.
Thread-based record-and-replay would violate the
processor-oblivious nature of these programs, as they
incorporate the number of threads into the recorded
information, constraining the replayed execution to the
same number of threads. In this article, we present a
processor-oblivious record-and-replay scheme for
dynamic multithreaded languages where record and replay
can use different number of processors and both are
scheduled using work stealing. We provide theoretical
guarantees for our record and replay scheme-namely that
record is optimal for programs with one lock and replay
is near-optimal for all cases. In addition, we
implemented this scheme in the Cilk Plus runtime system
and our evaluation indicates that
processor-obliviousness does not cause substantial
overheads.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Valero-Lara:2019:MTS,
author = "Pedro Valero-Lara and Ra{\"u}l Sirvent and Antonio J.
Pe{\~n}a and Jes{\'u}s Labarta",
title = "{MPI + OpenMP} tasking scalability for
multi-morphology simulations of the human brain",
journal = j-PARALLEL-COMPUTING,
volume = "84",
number = "??",
pages = "50--61",
month = may,
year = "2019",
CODEN = "PACOEJ",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Oct 14 16:20:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S016781911830317X",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Vasilache:2019:NAL,
author = "Nicolas Vasilache and Oleksandr Zinenko and Theodoros
Theodoridis and Priya Goyal and Zachary Devito and
William S. Moses and Sven Verdoolaege and Andrew Adams
and Albert Cohen",
title = "The Next 700 Accelerated Layers: From Mathematical
Expressions of Network Computation Graphs to
Accelerated {GPU} Kernels, Automatically",
journal = j-TACO,
volume = "16",
number = "4",
pages = "38:1--38:??",
month = oct,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3355606",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Sat Oct 12 15:31:26 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
abstract = "Deep learning frameworks automate the deployment,
distribution, synchronization, memory allocation, and
hardware acceleration of models represented as graphs
of computational operators. These operators wrap
high-performance libraries such as cuDNN or NNPACK.
When the computation does not match any predefined
library call, custom operators must be implemented,
often at high engineering cost and performance penalty,
limiting the pace of innovation. To address this
productivity gap, we propose and evaluate: (1) a
domain-specific language with a tensor notation close
to the mathematics of deep learning; (2) a Just-In-Time
optimizing compiler based on the polyhedral framework;
(3) carefully coordinated linear optimization and
evolutionary algorithms to synthesize high-performance
CUDA kernels; (4) the transparent integration of our
flow into PyTorch and Caffe2, providing the fully
automatic synthesis of high-performance GPU kernels
from simple tensor algebra. The performance is
comparable to, and often exceeds the performance of,
highly tuned libraries.",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J924",
}
@Article{Vitali:2019:EOO,
author = "Emanuele Vitali and Davide Gadioli and Gianluca
Palermo and Andrea Beccari and Carlo Cavazzoni and
Cristina Silvano",
title = "Exploiting {OpenMP} and {OpenACC} to accelerate a
geometric approach to molecular docking in
heterogeneous {HPC} nodes",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "7",
pages = "3374--3396",
month = jul,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02875-w",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:20 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/7;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Vu:2019:FMT,
author = "V. A. Vu and G. Tan",
title = "A Framework for Mesoscopic Traffic Simulation in
{GPU}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "8",
pages = "1691--1703",
month = aug,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2896636",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Aug 30 06:09:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "Computational modeling; Data models; data parallelism;
data structures; demand and supply components; GPU; GPU
threads; graphics processing units; Graphics processing
units; high-performance computing; innovative data
structure; Load modeling; Loading; mesoscopic traffic
simulation; Microscopy; optimisation; optimization;
road traffic; simulation algorithm; simulation flow;
traffic engineering computing; traffic management
support capabilities; traffic network; Vehicles",
}
@Article{Waidyasooriya:2019:OBD,
author = "Hasitha Muthumala Waidyasooriya and Masanori Hariyama
and Masamichi J. Miyama and Masayuki Ohzeki",
title = "{OpenCL}-based design of an {FPGA} accelerator for
quantum annealing simulation",
journal = j-J-SUPERCOMPUTING,
volume = "75",
number = "8",
pages = "5019--5039",
month = aug,
year = "2019",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02778-w",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Thu Oct 10 15:31:21 MDT 2019",
bibsource = "http://link.springer.com/journal/11227/75/8;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Wang:2019:FBA,
author = "Haomiao Wang and Prabu Thiagaraj and Oliver Sinnen",
title = "{FPGA}-based Acceleration of {FT} Convolution for
Pulsar Search Using {OpenCL}",
journal = j-TRETS,
volume = "11",
number = "4",
pages = "24:1--24:??",
month = jan,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3268933",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Sat Oct 19 17:43:01 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3268933",
abstract = "The Square Kilometre Array (SKA) project will be the
world's largest radio telescope array. With its large
number of antennas, the number of signals that need to
be processed is dramatic. One important element of the
SKA's Central Signal Processor package is pulsar
search. This article focuses on the FPGA-based
acceleration of the Frequency-Domain Acceleration
Search module, which is a part of SKA pulsar search
engine. In this module, the frequency-domain input
signals have to be processed by 85 Finite Impulse
response (FIR) filters within a short period of
limitation and for thousands of input arrays. Because
of the large scale of the input length and FIR filter
size, even high-end FPGA devices cannot parallelise the
task completely. We start by investigating both
time-domain FIR filter (TDFIR) and frequency-domain FIR
filter (FDFIR) to tackle this task. We applied the
overlap-add algorithm to split the coefficient array of
TDFIR and the overlap-save algorithm to split the input
signals of FDFIR. To achieve fast prototyping design,
we employed OpenCL, which is a high-level FPGA
development technique. The performance and power
consumption are evaluated using multiple FPGA devices
simultaneously and compared with GPU results, which is
achieved by porting FPGA-based OpenCL kernels. The
experimental evaluation shows that the FDFIR solution
is very competitive in terms of performance, with a
clear energy consumption advantage over the GPU
solution.",
acknowledgement = ack-nhfb,
articleno = "24",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "http://portal.acm.org/toc.cfm?id=J1151",
}
@Article{Wang:2019:MEM,
author = "L. Wang and M. Jahre and A. Adileh and Z. Wang and L.
Eeckhout",
title = "Modeling Emerging Memory-Divergent {GPU}
Applications",
journal = j-IEEE-COMPUT-ARCHIT-LETT,
volume = "18",
number = "2",
pages = "95--98",
month = jul,
year = "2019",
DOI = "https://doi.org/10.1109/LCA.2019.2923618",
ISSN = "1556-6056 (print), 1556-6064 (electronic)",
ISSN-L = "1556-6056",
bibdate = "Tue Oct 1 10:18:16 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeecomputarchitlett.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
abstract = "Analytical performance models yield valuable
architectural insight without incurring the excessive
runtime overheads of simulation. In this work, we study
contemporary GPU applications and find that the key
performance-related behavior of such applications is
distinct from traditional GPU applications. The key
issue is that these GPU applications are
memory-intensive and have poor spatial locality, which
implies that the loads of different threads commonly
access different cache blocks. Such memory-divergent
applications quickly exhaust the number of misses the
L1 cache can process concurrently, and thereby cripple
the GPU's ability to use Memory-Level Parallelism (MLP)
and Thread-Level Parallelism (TLP) to hide memory
latencies. Our Memory Divergence Model (MDM) is able to
accurately represent this behavior and thereby reduces
average performance prediction error by $ 14 \times $
compared to the state-of-the-art GPUMech approach
across our memory-divergent applications.",
acknowledgement = ack-nhfb,
fjournal = "IEEE Computer Architecture Letters",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=10208",
keywords = "Analytical models; analytical performance models;
Analytical performance prediction; average performance
prediction error; cache blocks; cache storage;
Computational modeling; contemporary GPU applications;
GPU; graphics processing units; Graphics processing
units; Instruction sets; key performance-related
behavior; L1 cache; Mathematical model; memory
architecture; memory divergence model; memory
latencies; memory-divergent applications;
memory-divergent GPU applications; memory-intensive;
memory-level parallelism; multi-threading;
multiprocessing systems; Predictive models; Random
access memory; thread-level parallelism; traditional
GPU applications; valuable architectural insight",
}
@Article{Warren:2019:CBG,
author = "Craig Warren and Antonios Giannopoulos and Alan Gray
and Iraklis Giannakis and Alan Patterson and Laura
Wetter and Andre Hamrah",
title = "A {CUDA}-based {GPU} engine for {gprMax}: Open source
{FDTD} electromagnetic simulation software",
journal = j-COMP-PHYS-COMM,
volume = "237",
number = "??",
pages = "208--218",
month = apr,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.11.007",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Feb 6 15:16:58 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/gnu.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518303990",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Wende:2019:OVT,
author = "Florian Wende and Martijn Marsman and Jeongnim Kim and
Fedor Vasilev and Zhengji Zhao and Thomas Steinke",
title = "{OpenMP} in {VASP}: Threading and {SIMD}",
journal = j-IJQC,
volume = "119",
number = "12",
pages = "e25851:1--e25851:??",
day = "15",
month = jun,
year = "2019",
CODEN = "IJQCB2",
DOI = "https://doi.org/10.1002/qua.25851",
ISSN = "0020-7608 (print), 1097-461X (electronic)",
ISSN-L = "0020-7608",
bibdate = "Wed Oct 9 06:14:07 MDT 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ijqc2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Quantum Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0020-7608/",
onlinedate = "19 December 2018",
}
@Article{Winkler:2019:GSM,
author = "Daniel Winkler and Massoud Rezavand and Michael
Meister and Wolfgang Rauch",
title = "{gpuSPHASE} --- a shared memory caching implementation
for {$2$D} {SPH} using {CUDA} (new version
announcement)",
journal = j-COMP-PHYS-COMM,
volume = "235",
number = "??",
pages = "514--516",
month = feb,
year = "2019",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2018.08.016",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Nov 24 07:45:46 MST 2018",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465518303126",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Wozniak:2019:MJW,
author = "Justin M. Wozniak and Matthieu Dorier and Robert Ross
and Tong Shu and Tahsin Kurc and Li Tang and Norbert
Podhorszki and Matthew Wolf",
title = "{MPI} jobs within {MPI} jobs: a practical way of
enabling task-level fault-tolerance in {HPC}
workflows",
journal = j-FUT-GEN-COMP-SYS,
volume = "101",
number = "??",
pages = "576--589",
month = dec,
year = "2019",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2019.05.020",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Feb 10 12:55:02 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X1830757X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Wu:2019:PMG,
author = "J. Wu and X. Yang and Z. Zhang and G. Chen and R.
Mao",
title = "A Performance Model for {GPU} Architectures that
Considers On-Chip Resources: Application to Medical
Image Registration",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "9",
pages = "1947--1961",
month = sep,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2905213",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Aug 30 06:09:58 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "Computational modeling; Computer architecture; CPU;
data transfer; Data transfer; GPU architectures;
graphics processing unit; graphics processing units;
Graphics processing units; graphics processing units;
image registration; Image registration; medical image
processing; medical image registration; NVIDIA GPUs;
on-chip GPU resources; on-chip resources; parallel
programming; parallel programs; Performance model;
performance model; Predictive models; System-on-chip",
}
@Article{Yeh:2019:PGR,
author = "Tsung Tai Yeh and Amit Sabne and Putt Sakdhnagool and
Rudolf Eigenmann and Timothy G. Rogers",
title = "{Pagoda}: a {GPU} Runtime System for Narrow Tasks",
journal = j-TOPC,
volume = "6",
number = "4",
pages = "21:1--21:??",
month = nov,
year = "2019",
CODEN = "????",
DOI = "https://doi.org/10.1145/3365657",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Wed Nov 20 07:59:59 MST 2019",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
abstract = "Massively multithreaded GPUs achieve high throughput
by running thousands of threads in parallel. To fully
utilize the their hardware, contemporary workloads
spawn work to the GPU in bulk by launching large tasks,
where each task is a kernel that contains thousands of
threads that occupy the entire GPU. GPUs face severe
underutilization and their performance benefits vanish
if the tasks are narrow, i.e., they contain less than
512 threads. Latency-sensitive applications in network,
signal, and image processing that generate a large
number of tasks with relatively small inputs are
examples of such limited parallelism. This article
presents Pagoda, a runtime system that virtualizes GPU
resources, using an OS-like daemon kernel called
MasterKernel. Tasks are spawned from the CPU onto
Pagoda as they become available, and are scheduled by
the MasterKernel at the warp granularity. This level of
control enables the GPU to keep scheduling and
executing tasks as long as free warps are found,
dramatically reducing underutilization. Experimental
results on real hardware demonstrate that Pagoda
achieves a geometric mean speedup of 5.52X over
PThreads running on a 20-core CPU, 1.76X over
CUDA-HyperQ, and 1.44X over GeMTC, the state-of-the-art
runtime GPU task scheduling system.",
acknowledgement = ack-nhfb,
articleno = "21",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "http://dl.acm.org/citation.cfm?id=2632163",
}
@Article{Zaitsev:2019:SLD,
author = "D. Zaitsev and S. Tomov and J. Dongarra",
title = "Solving Linear {Diophantine} Systems on Parallel
Architectures",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "30",
number = "5",
pages = "1158--1169",
month = may,
year = "2019",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2018.2873354",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Aug 30 06:09:58 2019",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "application program interfaces; clan; discrete system
modeling; discrete-event systems; distributed memory
systems; distributed-memory computing nodes;
distributing systems; dynamic task-dispatching
subsystem; formal languages; linear Diophantine system;
linear Diophantine systems-of-equations; logic
programming; Mathematical model; mathematics computing;
Matrix decomposition; message passing; model checking;
MPI; multiple cores; nonnegative integer numbers;
OpenMP; parallel architectures; Parallel architectures;
parallel architectures; parallel-sequential
composition; Petri net; Petri nets; polynomials; single
indecomposable system; Software algorithms; Sparse
matrices; sparse matrices; sparse matrix; speed-up;
system clans; Task analysis; two-level parallelization
concept",
}
@Article{Adamek:2020:GFC,
author = "Karel Ad{\'a}mek and Sofia Dimoudi and Mike Giles and
Wesley Armour",
title = "{GPU} Fast Convolution via the Overlap-and-Save Method
in Shared Memory",
journal = j-TACO,
volume = "17",
number = "3",
pages = "18:1--18:20",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3394116",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Fri Aug 28 12:02:00 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/taco.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/10.1145/3394116",
abstract = "We present an implementation of the overlap-and-save
method, a method for the convolution of very long
signals with short response functions, which is
tailored to GPUs. We have implemented several FFT
algorithms (using the CUDA programming language),
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "18",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Al-Mouhamed:2020:RCO,
author = "Mayez A. Al-Mouhamed and Ayaz H. Khan and Nazeeruddin
Mohammad",
title = "A review of {CUDA} optimization techniques and tools
for structured grid computing",
journal = j-COMPUTING,
volume = "102",
number = "4",
pages = "977--1003",
month = apr,
year = "2020",
CODEN = "CMPTA2",
DOI = "https://doi.org/10.1007/s00607-019-00744-1",
ISSN = "0010-485X (print), 1436-5057 (electronic)",
ISSN-L = "0010-485X",
bibdate = "Tue May 12 18:02:15 MDT 2020",
bibsource = "http://link.springer.com/journal/607/102/4;
http://www.math.utah.edu/pub/tex/bib/computing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing",
journal-URL = "http://link.springer.com/journal/607",
}
@Article{Allegretti:2020:OBB,
author = "S. Allegretti and F. Bolelli and C. Grana",
title = "Optimized Block-Based Algorithms to Label Connected
Components on {GPUs}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "2",
pages = "423--438",
month = feb,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2934683",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "connected components labeling; CUDA; GPU; Parallel
processing",
}
@Article{Amos:2020:AQQ,
author = "Brandon D. Amos and David R. Easterling and Layne T.
Watson and William I. Thacker and Brent S. Castle and
Michael W. Trosset",
title = "{Algorithm 1007}: {QNSTOP} --- Quasi-{Newton}
Algorithm for Stochastic Optimization",
journal = j-TOMS,
volume = "46",
number = "2",
pages = "17:1--17:20",
month = jun,
year = "2020",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3374219",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Fri Jun 12 07:37:53 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3374219",
abstract = "QNSTOP consists of serial and parallel (OpenMP)
Fortran 2003 codes for the quasi-Newton stochastic
optimization method of Castle and Trosset for
stochastic search problems. A complete description of
QNSTOP for both local search with stochastic objective
and global search with ``noisy'' deterministic
objective is given here, to the best of our knowledge,
for the first time. For stochastic search problems,
some convergence theory exists for particular
algorithmic choices and parameter values. Both the
parallel driver subroutine, which offers several
parallel decomposition strategies, and the serial
driver subroutine can be used for local stochastic
search or global deterministic search, based on an
input switch. Some performance data for computational
systems biology problems is given.",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Arabnejad:2020:SSC,
author = "Hamid Arabnejad and Jo{\~a}o Bispo and Jorge G.
Barbosa",
title = "Source-to-source compilation targeting {OpenMP}-based
automatic parallelization of {C} applications",
journal = j-J-SUPERCOMPUTING,
volume = "76",
number = "9",
pages = "6753--6785",
month = sep,
year = "2020",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-03109-9",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:19:58 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-019-03109-9",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 17 December 2019 Pages: 6753 - 6785",
}
@Article{Awan:2020:CPC,
author = "A. A. Awan and A. Jain and C. Chu and H. Subramoni and
D. K. Panda",
title = "Communication Profiling and Characterization of
Deep-Learning Workloads on Clusters With
High-Performance Interconnects",
journal = j-IEEE-MICRO,
volume = "40",
number = "1",
pages = "35--43",
month = jan,
year = "2020",
CODEN = "IEMIDZ",
DOI = "https://doi.org/10.1109/MM.2019.2949986",
ISSN = "0272-1732 (print), 1937-4143 (electronic)",
ISSN-L = "0272-1732",
bibdate = "Wed Jan 22 06:22:53 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeemicro.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Micro",
journal-URL = "http://www.computer.org/csdl/mags/mi/index.html",
keywords = "Communication Libraries; Deep learning; Distributed
computing; Graphics processing units; Heterogeneous
networks; Horovod; InfiniBand; Middleware; MVAPICH2
MPI; NVLink; Omni-Path; PCIe; Performance Analysis;
Performance analysis; Profiling; TensorFlow; Training
data",
}
@Article{Baek:2020:ESO,
author = "Nakhoon Baek",
title = "An emulation scheme for {OpenGL SC 2.0} over
{OpenGL}",
journal = j-J-SUPERCOMPUTING,
volume = "76",
number = "10",
pages = "7951--7960",
month = oct,
year = "2020",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-018-2399-1",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:19:56 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-018-2399-1",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 02 May 2018 Pages: 7951 - 7960",
}
@Article{Ballard:2020:TPC,
author = "Grey Ballard and Alicia Klinvex and Tamara G. Kolda",
title = "{TuckerMPI}: a Parallel {C++\slash MPI} Software
Package for Large-scale Data Compression via the
{Tucker} Tensor Decomposition",
journal = j-TOMS,
volume = "46",
number = "2",
pages = "13:1--13:31",
month = jun,
year = "2020",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3378445",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Fri Jun 12 07:37:53 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/datacompression.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3378445",
abstract = "Our goal is compression of massive-scale
grid-structured data, such as the multi-terabyte output
of a high-fidelity computational simulation. For such
data sets, we have developed a new software package
called TuckerMPI, a parallel C++/MPI software package
for compressing distributed data. The approach is based
on treating the data as a tensor, i.e., a
multidimensional array, and computing its truncated
Tucker decomposition, a higher-order analogue to the
truncated singular value decomposition of a matrix. The
result is a low-rank approximation of the original
tensor-structured data. Compression efficiency is
achieved by detecting latent global structure within
the data, which we contrast to most compression methods
that are focused on local structure. In this work, we
describe TuckerMPI, our implementation of the truncated
Tucker decomposition, including details of the data
distribution and in-memory layouts, the parallel and
serial implementations of the key kernels, and analysis
of the storage, communication, and computational costs.
We test the software on 4.5 and 6.7 terabyte data sets
distributed across 100 s of nodes (1,000 s of MPI
processes), achieving compression ratios between 100
and 200,000$ \times $, which equates to 99--99.999\%
compression (depending on the desired accuracy) in
substantially less time than it would take to even read
the same dataset from a parallel file system. Moreover,
we show that our method also allows for reconstruction
of partial or down-sampled data on a single node,
without a parallel computer so long as the
reconstructed portion is small enough to fit on a
single machine, e.g., in the instance of
reconstructing/visualizing a single down-sampled time
step or computing summary statistics. The code is
available at https://gitlab.com/tensors/TuckerMPI.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Barreda:2020:IFC,
author = "Mar{\'\i}a Barreda and Jos{\'e} I. Aliaga and Marc
Casas",
title = "Iteration-fusing conjugate gradient for sparse linear
systems with {MPI + OmpSs}",
journal = j-J-SUPERCOMPUTING,
volume = "76",
number = "9",
pages = "6669--6689",
month = sep,
year = "2020",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-03100-4",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:19:58 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-019-03100-4",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 10 December 2019 Pages: 6669 - 6689",
}
@Article{Bernholdt:2020:SMU,
author = "David E. Bernholdt and Swen Boehm and George Bosilca
and Manjunath Gorentla Venkata and Ryan E. Grant and
Thomas Naughton and Howard P. Pritchard and Martin
Schulz and Geoffroy R. Vallee",
title = "A survey of {MPI} usage in the {US} exascale computing
project",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e4851:1--e4851:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4851",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "27 September 2018",
}
@Article{Bombieri:2020:MIB,
author = "N. Bombieri and F. Busato and A. Danese and L.
Piccolboni and G. Pravadelli",
title = "{Mangrove}: An Inference-Based Dynamic Invariant
Mining for {GPU} Architectures",
journal = j-IEEE-TRANS-COMPUT,
volume = "69",
number = "4",
pages = "606--620",
month = apr,
year = "2020",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2019.2953846",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Mar 12 16:58:27 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
keywords = "GPUs; inference; Invarinant mining",
}
@Article{Cabral:2020:EMO,
author = "Frederico L. Cabral and Sanderson L. Gonzaga de
Oliveira and Carla Osthoff and Gabriel P. Costa and
Diego N. Brand{\~a}o and Mauricio Kischinhevsky",
title = "An evaluation of {MPI} and {OpenMP} paradigms in
finite-difference explicit methods for {PDEs} on
shared-memory multi- and manycore systems",
journal = j-CCPE,
volume = "32",
number = "20",
pages = "e5642:1--e5642:??",
day = "25",
month = oct,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5642",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:20 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "29 December 2019",
}
@Article{Cesarini:2020:CSR,
author = "D. Cesarini and A. Bartolini and A. Borghesi and C.
Cavazzoni and M. Luisier and L. Benini",
title = "Countdown Slack: a Run-Time Library to Reduce Energy
Footprint in Large-Scale {MPI} Applications",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "11",
pages = "2696--2709",
year = "2020",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Sat Aug 15 14:52:38 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Chakraborty:2020:ESE,
author = "Sourav Chakraborty and Ignacio Laguna and Murali Emani
and Kathryn Mohror and Dhabaleswar K. Panda and Martin
Schulz and Hari Subramoni",
title = "{ER einit}: Scalable and efficient fault-tolerance for
bulk-synchronous {MPI} applications",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e4863:1--e4863:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4863",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "14 August 2018",
}
@Article{Chang:2020:ADI,
author = "Tyler H. Chang and Layne T. Watson and Thomas C. H.
Lux and Ali R. Butt and Kirk W. Cameron and Yili Hong",
title = "{Algorithm 1012}: {DELAUNAYSPARSE}: Interpolation via
a Sparse Subset of the {Delaunay} Triangulation in
Medium to High Dimensions",
journal = j-TOMS,
volume = "46",
number = "4",
pages = "38:1--38:20",
month = nov,
year = "2020",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3422818",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Sat Nov 14 07:15:52 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/10.1145/3422818",
abstract = "DELAUNAYSPARSE contains both serial and parallel codes
written in Fortran 2003 (with OpenMP) for performing
medium- to high-dimensional interpolation via the
Delaunay triangulation. To accommodate the exponential
growth in the size of the Delaunay \ldots{}",
acknowledgement = ack-nhfb,
articleno = "38",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Cho:2020:PMP,
author = "Y. Cho and S. Oh and B. Egger",
title = "Performance Modeling of Parallel Loops on Multi-Socket
Platforms Using Queueing Systems",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "2",
pages = "318--331",
month = feb,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2938172",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "Computational modeling; Dynamic scheduling;
multi-socket system; Multicore processing; NUMA;
OpenMP; parallel loop; Performance modeling; Predictive
models; queueing system; Servers; Time factors",
}
@Article{Daleiden:2020:GPP,
author = "Patrick Daleiden and Andreas Stefik and Philip Merlin
Uesbeck",
title = "{GPU} Programming Productivity in Different
Abstraction Paradigms: a Randomized Controlled Trial
Comparing {CUDA} and Thrust",
journal = j-TOCE,
volume = "20",
number = "4",
pages = "27:1--27:27",
month = nov,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3418301",
ISSN = "1946-6226",
ISSN-L = "1946-6226",
bibdate = "Sat Mar 20 18:20:46 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toce.bib",
URL = "https://dl.acm.org/doi/10.1145/3418301",
abstract = "Coprocessor architectures in High Performance
Computing are prevalent in today's scientific computing
clusters and require specialized knowledge for proper
utilization. Various alternative paradigms for parallel
and offload computation exist, but little \ldots{}",
acknowledgement = ack-nhfb,
articleno = "27",
fjournal = "ACM Transactions on Computing Education",
journal-URL = "https://dl.acm.org/loi/toce",
}
@Article{Davydov:2020:ADS,
author = "Denis Davydov and Martin Kronbichler",
title = "Algorithms and Data Structures for Matrix-Free Finite
Element Operators with {MPI}-Parallel Sparse
Multi-Vectors",
journal = j-TOPC,
volume = "7",
number = "3",
pages = "20:1--20:30",
month = aug,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3399736",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Thu Aug 6 08:56:07 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3399736",
abstract = "Traditional solution approaches for problems in
quantum mechanics scale as $ O(M^3) $, where $M$ is the
number of electrons. Various methods have been proposed
to address this issue and obtain a linear scaling $
O(M)$. One promising formulation is the direct
minimization of energy. Such methods take advantage of
physical localization of the solution, allowing users
to seek it in terms of non-orthogonal orbitals with
local support.\par
This work proposes a numerically efficient
implementation of sparse parallel vectors within the
open-source finite element library deal.II. The main
algorithmic ingredient is the matrix-free evaluation of
the Hamiltonian operator by cell-wise quadrature. Based
on an a-priori chosen support for each vector, we
develop algorithms and data structures to perform (i)
matrix-free sparse matrix multivector products (SpMM),
(ii) the projection of an operator onto a sparse
sub-space (inner products), and (iii)
post-multiplication of a sparse multivector with a
square matrix. The node-level performance is analyzed
using a roofline model. Our matrix-free implementation
of finite element operators with sparse multivectors
achieves a performance of 157 GFlop/s on an Intel
Cascade Lake processor with 20 cores. Strong and weak
scaling results are reported for a representative
benchmark problem using quadratic and quartic finite
element bases.",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "https://dl.acm.org/loi/topc",
}
@Article{Deng:2020:CCB,
author = "Y. Deng and T. Li and Y. Luo and X. Zhao",
title = "Corrections to {``CUDA-Based Volume Rendering and
Inspection for Time-Varying Ultrasonic Testing
Datasets''}",
journal = j-COMPUT-SCI-ENG,
volume = "22",
number = "1",
pages = "4--4",
month = jan # "\slash " # feb,
year = "2020",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2019.2948481",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu Mar 05 14:46:04 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "See \cite{Deng:2019:CBV}.",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
keywords = "Acoustics; Biographies; Inspection; Rendering
(computer graphics); Testing",
}
@Article{Diener:2020:HCO,
author = "Matthias Diener and Laxmikant V. Kale and Daniel J.
Bodony",
title = "Heterogeneous computing with {OpenMP} and {Hydra}",
journal = j-CCPE,
volume = "32",
number = "20",
pages = "e5728:1--e5728:??",
day = "25",
month = oct,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5728",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:20 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "07 March 2020",
}
@Article{Eichenberger:2020:HCG,
author = "A. E. Eichenberger and G.-T. Bercea and A. Bataev and
L. Grinberg and J. K. O'Brien",
title = "Hybrid {CPU\slash GPU} tasks optimized for concurrency
in {OpenMP}",
journal = j-IBM-JRD,
volume = "64",
number = "3/4",
pages = "13:1--13:14",
month = may # "\slash " # jul,
year = "2020",
CODEN = "IBMJAE",
DOI = "https://doi.org/10.1147/JRD.2019.2960245",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Wed Jun 3 18:35:26 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ibmjrd.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
}
@Article{Eichstadt:2020:CSM,
author = "Jan Eichst{\"a}dt and Martin Vymazal and David Moxey
and Joaquim Peir{\'o}",
title = "A comparison of the shared-memory parallel programming
models {{\em OpenMP}}, {{\em OpenACC}} and {{\em
Kokkos}} in the context of implicit solvers for
high-order {FEM}",
journal = j-COMP-PHYS-COMM,
volume = "255",
number = "??",
pages = "Article 107245",
month = oct,
year = "2020",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2020.107245",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jun 19 07:19:50 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465520300746",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Elis:2020:QNG,
author = "Bengisu Elis and Dai Yang and Olga Pearce and Kathryn
Mohror and Martin Schulz",
title = "{QMPI}: a next generation {MPI} profiling interface
for modern {HPC} platforms",
journal = j-PARALLEL-COMPUTING,
volume = "96",
number = "??",
pages = "Article 102635",
month = aug,
year = "2020",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2020.102635",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Mar 29 11:36:01 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300284",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Fan:2020:ALC,
author = "Q. Fan and D. J. Lilja and S. S. Sapatnekar",
title = "Adaptive-Length Coding of Image Data for Low-Cost
Approximate Storage",
journal = j-IEEE-TRANS-COMPUT,
volume = "69",
number = "2",
pages = "239--252",
month = feb,
year = "2020",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2019.2946795",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Wed Jan 22 06:44:09 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/datacompression.bib;
http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
keywords = "Adaptive-length coding; approximate storage; Discrete
cosine transforms; error-resilience; Huffman coding;
Image coding; Reliability; Resilience; Transform
coding",
}
@Article{Ferreira:2020:HMM,
author = "Kurt Ferreira and Ryan E. Grant and Michael J.
Levenhagen and Scott Levy and Taylor Groves",
title = "Hardware {MPI} message matching: Insights into {MPI}
matching behavior to inform design",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e5150:1--e5150:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5150",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "27 February 2019",
}
@Article{Gao:2020:MES,
author = "T. Gao and Y. Guo and B. Zhang and P. Cicotti and Y.
Lu and P. Balaji and M. Taufer",
title = "Memory-Efficient and Skew-Tolerant {MapReduce} Over
{MPI} for Supercomputing Systems",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "12",
pages = "2734--2748",
year = "2020",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Sat Aug 15 14:52:38 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Gawande:2020:SDL,
author = "Nitin A. Gawande and Jeff A. Daily and Charles Siegel
and Nathan R. Tallent and Abhinav Vishnu",
title = "Scaling Deep Learning workloads: {NVIDIA DGX-1\slash
Pascal} and {Intel Knights Landing}",
journal = j-FUT-GEN-COMP-SYS,
volume = "108",
number = "??",
pages = "1162--1172",
month = jul,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2018.04.073",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jun 19 07:44:16 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X17318599",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Ghazimirsaeed:2020:CAM,
author = "S. Mahdieh Ghazimirsaeed and Seyed H. Mirsadeghi and
Ahmad Afsahi",
title = "Communication-aware message matching in {MPI}",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e4862:1--e4862:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4862",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "21 September 2018",
}
@Article{Gonzalez-Dominguez:2020:CJA,
author = "Jorge Gonz{\'a}lez-Dom{\'\i}nguez and Roberto R.
Exp{\'o}sito and Ver{\'o}nica Bol{\'o}n-Canedo",
title = "{CUDA-JMI}: Acceleration of feature selection on
heterogeneous systems",
journal = j-FUT-GEN-COMP-SYS,
volume = "102",
number = "??",
pages = "426--436",
month = jan,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2019.08.031",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Feb 10 12:55:04 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19312968",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Gutierrez:2020:MAP,
author = "Samuel K. Guti{\'e}rrez and Dorian C. Arnold and Kei
Davis and Patrick McCormick",
title = "On the memory attribution problem: a solution and case
study using {MPI}",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e5159:1--e5159:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5159",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "04 February 2019",
}
@Article{Hagedorn:2020:AHP,
author = "Bastian Hagedorn and Johannes Lenfers and Thomas
K{\oe}hler and Xueying Qin and Sergei Gorlatch and
Michel Steuwer",
title = "Achieving high-performance the functional way: a
functional pearl on expressing high-performance
optimizations as rewrite strategies",
journal = j-PACMPL,
volume = "4",
number = "ICFP",
pages = "92:1--92:29",
month = aug,
year = "2020",
DOI = "https://doi.org/10.1145/3408974",
bibdate = "Tue Mar 30 08:10:48 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pacmpl.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/10.1145/3408974",
abstract = "Optimizing programs to run efficiently on modern
parallel hardware is hard but crucial for many
applications. The predominantly used imperative
languages --- like C or OpenCL --- force the programmer
to intertwine the code describing functionality and
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "92",
fjournal = "Proceedings of the ACM on Programming Languages",
journal-URL = "https://pacmpl.acm.org/",
}
@Article{Hashmi:2020:FXZ,
author = "Jahanzeb Maqbool Hashmi and Ching-Hsiang Chu and
Sourav Chakraborty and Mohammadreza Bayatpour and Hari
Subramoni and Dhabaleswar K. Panda",
title = "{FALCON-X}: Zero-copy {MPI} derived datatype
processing on modern {CPU} and {GPU} architectures",
journal = j-J-PAR-DIST-COMP,
volume = "144",
number = "??",
pages = "1--13",
month = oct,
year = "2020",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2020.05.008",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed May 26 16:11:02 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731520302872",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{He:2020:SMO,
author = "Feng He and Xiaoshe Dong and Nianjun Zou and Weiguo Wu
and Xingjun Zhang",
title = "Structured mesh-oriented framework design and
optimization for a coarse-grained parallel {CFD} solver
based on hybrid {MPI\slash OpenMP} programming",
journal = j-J-SUPERCOMPUTING,
volume = "76",
number = "4",
pages = "2815--2841",
month = apr,
year = "2020",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-03063-6",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jul 25 07:17:55 MDT 2020",
bibsource = "http://link.springer.com/journal/11227/76/4;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Huang:2020:POL,
author = "Ming Hsiang Huang and Wuu Yang",
title = "{PFACC}: an {OpenACC}-like programming model for
irregular nested parallelism",
journal = j-SPE,
volume = "50",
number = "10",
pages = "1877--1904",
month = oct,
year = "2020",
CODEN = "SPEXBL",
DOI = "https://doi.org/10.1002/spe.2868",
ISSN = "0038-0644 (print), 1097-024X (electronic)",
ISSN-L = "0038-0644",
bibdate = "Fri Feb 26 08:59:23 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/spe.bib",
acknowledgement = ack-nhfb,
ajournal = "Softw. Pract. Exp.",
fjournal = "Software --- Practice and Experience",
journal-URL = "http://onlinelibrary.wiley.com/journal/10.1002/(ISSN)1097-024X",
onlinedate = "09 July 2020",
}
@Article{Jaksic:2020:HPF,
author = "Zoran Jak{\v{s}}i{\'c} and Nicola Cadenelli and David
Buchaca Prats and Jord{\`a} Polo and Josep Llu{\'{\i}}s
Berral Garcia and David Carrera Perez",
title = "A highly parameterizable framework for Conditional
Restricted {Boltzmann} Machine based workloads
accelerated with {FPGAs} and {OpenCL}",
journal = j-FUT-GEN-COMP-SYS,
volume = "104",
number = "??",
pages = "201--211",
month = mar,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2019.10.025",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Feb 10 12:55:06 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19313676",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Kang:2020:IMC,
author = "Q. Kang and S. Lee and K. Hou and R. Ross and A.
Agrawal and A. Choudhary and W. Liao",
title = "Improving {MPI} Collective {I/O} for High Volume
Non-Contiguous Requests With Intra-Node Aggregation",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "11",
pages = "2682--2695",
year = "2020",
CODEN = "ITDSEO",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Sat Aug 15 14:52:38 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Kirkham:2020:FEM,
author = "Jake Kirkham and Tyler Sorensen and Esin Tureci and
Margaret Martonosi",
title = "Foundations of empirical memory consistency testing",
journal = j-PACMPL,
volume = "4",
number = "OOPSLA",
pages = "226:1--226:29",
month = nov,
year = "2020",
DOI = "https://doi.org/10.1145/3428294",
bibdate = "Tue Mar 30 08:10:50 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pacmpl.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/10.1145/3428294",
abstract = "Modern memory consistency models are complex, and it
is difficult to reason about the relaxed behaviors that
current systems allow. Programming languages, such as C
and OpenCL, offer a memory model interface that
developers can use to safely write \ldots{}",
acknowledgement = ack-nhfb,
articleno = "226",
fjournal = "Proceedings of the ACM on Programming Languages",
journal-URL = "https://pacmpl.acm.org/",
}
@Article{Klinkenberg:2020:CRL,
author = "Jannis Klinkenberg and Philipp Samfass and Michael
Bader and Christian Terboven and Matthias S.
M{\"u}ller",
title = "{CHAMELEON}: Reactive Load Balancing for Hybrid {MPI +
OpenMP} Task-Parallel Applications",
journal = j-J-PAR-DIST-COMP,
volume = "138",
number = "??",
pages = "55--64",
month = apr,
year = "2020",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:11 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519305180",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Korch:2020:ILE,
author = "Matthias Korch and Tim Werner",
title = "Improving locality of explicit one-step methods on
{GPUs} by tiling across stages and time steps",
journal = j-FUT-GEN-COMP-SYS,
volume = "102",
number = "??",
pages = "889--901",
month = jan,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2019.07.075",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Mon Feb 10 12:55:04 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19307186",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Larrea:2020:EPM,
author = "Ver{\'o}nica G. Vergara Larrea and Reuben D. Budiardja
and Rahulkumar Gayatri and Christopher Daley and Oscar
Hernandez and Wayne Joubert",
title = "Experiences in porting mini-applications to {OpenACC}
and {OpenMP} on heterogeneous systems",
journal = j-CCPE,
volume = "32",
number = "20",
pages = "e5780:1--e5780:??",
day = "25",
month = oct,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5780",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:20 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "24 April 2020",
}
@Article{Levy:2020:UVA,
author = "Scott Levy and Kurt B. Ferreira and Patrick Widener",
title = "The unexpected virtue of almost: Exploiting {MPI}
collective operations to approximately coordinate
checkpoints",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e4890:1--e4890:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4890",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "09 September 2018",
}
@Article{Li:2020:OOS,
author = "Ting Li and Lawrence V. Stanislawski and Tyler
Brockmeyer and Shaowen Wang and Ethan Shavers",
title = "\pkg{OpenCLC}: an open-source software tool for
similarity assessment of linear hydrographic features",
journal = j-SOFTWAREX,
volume = "11",
number = "??",
pages = "Article 100401",
month = jan # "\slash " # jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1016/j.softx.2020.100401",
ISSN = "2352-7110",
ISSN-L = "2352-7110",
bibdate = "Fri Apr 9 16:04:39 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/softwarex.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S2352711018302747",
acknowledgement = ack-nhfb,
fjournal = "SoftwareX",
journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues",
}
@Article{Li:2020:SLF,
author = "Qinbo Li and Nima Khademi Kalantari",
title = "Synthesizing light field from a single image with
variable {MPI} and two network fusion",
journal = j-TOG,
volume = "39",
number = "6",
pages = "229:1--229:10",
month = nov,
year = "2020",
CODEN = "ATGRDF",
DOI = "https://doi.org/10.1145/3414685.3417785",
ISSN = "0730-0301 (print), 1557-7368 (electronic)",
ISSN-L = "0730-0301",
bibdate = "Sun Mar 28 08:21:45 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tog.bib",
URL = "https://dl.acm.org/doi/10.1145/3414685.3417785",
abstract = "We propose a learning-based approach to synthesize a
light field with a small baseline from a single image.
We synthesize the novel view images by first using a
convolutional neural network (CNN) to promote the input
image into a layered representation \ldots{}",
acknowledgement = ack-nhfb,
articleno = "229",
fjournal = "ACM Transactions on Graphics",
journal-URL = "https://dl.acm.org/loi/tog",
}
@Article{Liang:2020:AMD,
author = "Jianguo Liang and Rong Hua and Hao Zhang and Wenqiang
Zhu and You Fu",
title = "Accelerated molecular dynamics simulation of Silicon
Crystals on {TaihuLight} using {OpenACC}",
journal = j-PARALLEL-COMPUTING,
volume = "99",
number = "??",
pages = "Article 102667",
month = nov,
year = "2020",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2020.102667",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Mar 29 11:36:02 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300600",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Liao:2020:DCS,
author = "Xiaofei Liao and Long Zheng and Binsheng Zhang and Yu
Zhang and Hai Jin and Xuanhua Shi and Yi Lin",
title = "Dynamic cluster strategy for hierarchical
rollback-recovery protocols in {MPI} {HPC}
applications",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e4173:1--e4173:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4173",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "24 May 2017",
}
@Article{Lin:2020:EAM,
author = "Bo Lin and Chijie Zhuang and Zhenning Cai and Rong
Zeng and Weizhu Bao",
title = "An efficient and accurate {MPI}-based parallel
simulator for streamer discharges in three dimensions",
journal = j-J-COMPUT-PHYS,
volume = "401",
number = "??",
pages = "Article 109026",
day = "15",
month = jan,
year = "2020",
CODEN = "JCTPAH",
ISSN = "0021-9991 (print), 1090-2716 (electronic)",
ISSN-L = "0021-9991",
bibdate = "Mon Mar 9 18:28:21 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jcomputphys2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0021999119307326",
acknowledgement = ack-nhfb,
fjournal = "Journal of Computational Physics",
journal-URL = "http://www.sciencedirect.com/science/journal/00219991",
}
@Article{Lin:2020:GTD,
author = "Huanxin Lin and Cho-Li Wang",
title = "On-{GPU} thread-data remapping for nested branch
divergence",
journal = j-J-PAR-DIST-COMP,
volume = "139",
number = "??",
pages = "75--86",
month = may,
year = "2020",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:12 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731518308967",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Losada:2020:FTM,
author = "Nuria Losada and Patricia Gonz{\'a}lez and
Mar{\'{\i}}a J. Mart{\'{\i}}n and George Bosilca and
Aur{\'e}lien Bouteiller and Keita Teranishi",
title = "Fault tolerance of {MPI} applications in exascale
systems: the {ULFM} solution",
journal = j-FUT-GEN-COMP-SYS,
volume = "106",
number = "??",
pages = "467--481",
month = may,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2020.01.026",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jun 19 07:44:13 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X1930860X",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Lu:2020:GQO,
author = "Q. Lu and J. Yao and H. Guan and P. Gao",
title = "{gQoS}: a {QoS}-Oriented {GPU} Virtualization with
Adaptive Capacity Sharing",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "4",
pages = "843--855",
month = apr,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2948753",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
keywords = "Cloud computing; cloud computing; GPU virtualization;
Graphics processing units; Hardware; QoS control;
Quality of service; Resource management; resource
scheduling; Virtual machining; Virtualization",
}
@Article{Mantas:2020:HOC,
author = "Jos{\'e} M. Mantas and Francesco Vecil",
title = "Hybrid {OpenMP--CUDA} parallel implementation of a
deterministic solver for ultrashort {DG-MOSFETs}",
journal = j-IJHPCA,
volume = "34",
number = "1",
pages = "81--102",
day = "1",
month = jan,
year = "2020",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342019879985",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Sat Jul 25 09:38:31 MDT 2020",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019879985",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
}
@Article{Mena:2020:GAS,
author = "Hermann Mena and Lena-Maria Pfurtscheller and Tony
Stillfjord",
title = "{GPU} acceleration of splitting schemes applied to
differential matrix equations",
journal = j-NUMER-ALGORITHMS,
volume = "83",
number = "1",
pages = "395--419",
month = jan,
year = "2020",
CODEN = "NUALEG",
DOI = "https://doi.org/10.1007/s11075-019-00687-w",
ISSN = "1017-1398 (print), 1572-9265 (electronic)",
ISSN-L = "1017-1398",
bibdate = "Wed Jan 22 08:40:22 MST 2020",
bibsource = "http://link.springer.com/journal/11075/83/1;
http://www.math.utah.edu/pub/tex/bib/numeralgorithms.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://link.springer.com/content/pdf/10.1007/s11075-019-00687-w.pdf",
acknowledgement = ack-nhfb,
fjournal = "Numerical Algorithms",
journal-URL = "http://link.springer.com/journal/11075",
}
@Article{Mofrad:2020:GNA,
author = "Mohammad Hasanzadeh Mofrad and Rami Melhem and Yousuf
Ahmad and Mohammad Hammoud",
title = "{Graphite}: a {NUMA}-aware {HPC} system for graph
analytics based on a new {MPI * X} parallelism model",
journal = j-PROC-VLDB-ENDOWMENT,
volume = "13",
number = "6",
pages = "783--797",
month = feb,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.14778/3380750.3380751",
ISSN = "2150-8097",
bibdate = "Thu Apr 2 10:51:28 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/vldbe.bib",
URL = "https://dl.acm.org/doi/abs/10.14778/3380750.3380751",
abstract = "In this paper, we propose a new parallelism model
denoted as MPI * X and suggest a linear algebra-based
graph analytics system, namely, Graphite, which
effectively employs it. MPI * X promotes thread-based
partitioning to distribute computation and \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "Proceedings of the VLDB Endowment",
journal-URL = "https://dl.acm.org/loi/pvldb",
}
@Article{Mu:2020:OOB,
author = "Jiandong Mu and Wei Zhang and Hao Liang and Sharad
Sinha",
title = "Optimizing {OpenCL}-Based {CNN} Design on {FPGA} with
Comprehensive Design Space Exploration and
Collaborative Performance Modeling",
journal = j-TRETS,
volume = "13",
number = "3",
pages = "13:1--13:28",
month = sep,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1145/3397514",
ISSN = "1936-7406 (print), 1936-7414 (electronic)",
ISSN-L = "1936-7406",
bibdate = "Sat Sep 5 18:51:36 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/trets.bib",
URL = "https://dl.acm.org/doi/10.1145/3397514",
abstract = "Recent success in applying convolutional neural
networks (CNNs) to object detection and classification
has sparked great interest in accelerating CNNs using
hardware-like field-programmable gate arrays (FPGAs).
However, finding an efficient FPGA design \ldots{}",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Reconfigurable Technology and
Systems (TRETS)",
journal-URL = "https://dl.acm.org/loi/trets",
}
@Article{Nandal:2020:NSG,
author = "P. Nandal and R. P. Sharma",
title = "Numerical simulation on {GPUs} with {CUDA} to study
nonlinear dynamics of whistler wave and its turbulent
spectrum in radiation belts",
journal = j-COMP-PHYS-COMM,
volume = "254",
number = "??",
pages = "Article 107214",
month = sep,
year = "2020",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2020.107214",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Fri Jun 19 07:19:49 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465520300497",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Naranjo:2020:ASC,
author = "Diana M. Naranjo and Sebasti{\'a}n Risco and Carlos de
Alfonso and Alfonso P{\'e}rez and Ignacio Blanquer and
Germ{\'a}n Molt{\'o}",
title = "Accelerated serverless computing based on {GPU}
virtualization",
journal = j-J-PAR-DIST-COMP,
volume = "139",
number = "??",
pages = "32--42",
month = may,
year = "2020",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2020.01.004",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:12 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519303533",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Petrovic:2020:BSH,
author = "Filip Petrovi{\v{c}} and David St{\v{r}}el{\'a}k and
Jana Hozzov{\'a} and Jaroslav Ol'ha and Richard
Trembeck{\'y} and Siegfried Benkner and
Ji{\v{r}}{\'{\i}} Filipovi{\v{c}}",
title = "A benchmark set of highly-efficient {CUDA} and
{OpenCL} kernels and its dynamic autotuning with
{Kernel Tuning Toolkit}",
journal = j-FUT-GEN-COMP-SYS,
volume = "108",
number = "??",
pages = "161--177",
month = jul,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2020.02.069",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jun 19 07:44:16 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19327360",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Prades:2020:MRU,
author = "Javier Prades and Baldomero Imbern{\'o}n and Carlos
Rea{\~n}o and Jorge Pe{\~n}a-Garc{\'\i}a and Jose Pedro
Cer{\'o}n-Carrasco and Federico Silla and Horacio
P{\'e}rez-S{\'a}nchez",
title = "Maximizing resource usage in multifold molecular
dynamics with {rCUDA}",
journal = j-IJHPCA,
volume = "34",
number = "1",
pages = "5--19",
day = "1",
month = jan,
year = "2020",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342019857131",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Sat Jul 25 09:38:31 MDT 2020",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019857131",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
}
@Article{Rasch:2020:DHL,
author = "Ari Rasch and Julian Bigge and Martin Wrodarczyk and
Richard Schulze and Sergei Gorlatch",
title = "{dOCAL}: high-level distributed programming with
{OpenCL} and {CUDA}",
journal = j-J-SUPERCOMPUTING,
volume = "76",
number = "7",
pages = "5117--5138",
month = jul,
year = "2020",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-019-02829-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Sat Jul 25 07:17:59 MDT 2020",
bibsource = "http://link.springer.com/journal/11227/76/7;
http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Reis:2020:CMC,
author = "Lu{\'\i}s Reis and Jo{\~a}o Bispo and Jo{\~a}o M. P.
Cardoso",
title = "Compilation of {MATLAB} computations to {CPU\slash
GPU} via {C\slash OpenCL} generation",
journal = j-CCPE,
volume = "32",
number = "22",
pages = "e5854:1--e5854:??",
day = "25",
month = nov,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5854",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:22 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "01 June 2020",
}
@Article{Renaud:2020:IMS,
author = "Nicolas Renaud and Yong Jung and Vasant Honavar and
Cunliang Geng and Alexandre M. J. J. Bonvin and Li C.
Xue",
title = "\pkg{iScore}: an {MPI} supported software for ranking
protein-protein docking models based on a random walk
graph kernel and support vector machines",
journal = j-SOFTWAREX,
volume = "11",
number = "??",
pages = "Article 100462",
month = jan # "\slash " # jun,
year = "2020",
CODEN = "????",
DOI = "https://doi.org/10.1016/j.softx.2020.100462",
ISSN = "2352-7110",
ISSN-L = "2352-7110",
bibdate = "Fri Apr 9 16:04:39 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/softwarex.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S2352711019303061",
acknowledgement = ack-nhfb,
fjournal = "SoftwareX",
journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues",
}
@Article{Russek:2020:SLC,
author = "Pawe{\l} Russek and Pawe{\l} Russek and Ernest Jamro
and Agnieszka Dabrowska-Boruch and Kazimierz Wiatr",
title = "A study of the loops control for reconfigurable
computing with {OpenCL} in the {LABS} local search
problem",
journal = j-IJHPCA,
volume = "34",
number = "1",
pages = "103--114",
day = "1",
month = jan,
year = "2020",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342019868515",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Sat Jul 25 09:38:31 MDT 2020",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342019868515",
acknowledgement = ack-nhfb,
fjournal = "International Journal of High Performance Computing
Applications",
}
@Article{Salinas:2020:FEI,
author = "{\'A}lvaro Salinas and Claudio Torres and Orlando
Ayala",
title = "A fast and efficient integration of boundary
conditions into a unified {CUDA} Kernel for a shallow
water solver lattice {Boltzmann} Method",
journal = j-COMP-PHYS-COMM,
volume = "249",
number = "??",
pages = "Article 107009",
month = apr,
year = "2020",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Mar 2 13:57:36 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465519303443",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Samfass:2020:LTO,
author = "Philipp Samfass and Tobias Weinzierl and Dominic E.
Charrier and Michael Bader",
title = "Lightweight task offloading exploiting {MPI} wait
times for parallel adaptive mesh refinement",
journal = j-CCPE,
volume = "32",
number = "24",
pages = "e5916:1--e5916:??",
day = "25",
month = dec,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5916",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:23 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "09 July 2020",
}
@Article{Shekofteh:2020:CEC,
author = "S.-Kazen Shekofteh and Hamid Noori and Mahmoud
Naghibzadeh and Holger Fr{\"o}ning and Hadi Sadog
Yazdi",
title = "{cCUDA}: Effective Co-Scheduling of Concurrent Kernels
on {GPUs}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "4",
pages = "766--778",
month = apr,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2944602",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "Analytical models; Benchmark testing; concurrent
kernel execution; Graphics processing units; Hardware;
Kernel; resource management; Scheduling; scheduling;
stream",
}
@Article{Shen:2020:GPC,
author = "Qi Shen and Craig Sharp and Richard Davison and Gary
Ushaw and Rajiv Ranjan and Albert Y. Zomaya and Graham
Morgan",
title = "A general purpose contention manager for software
transactions on the {GPU}",
journal = j-J-PAR-DIST-COMP,
volume = "139",
number = "??",
pages = "1--17",
month = may,
year = "2020",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2019.12.018",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:12 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519301376",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Silla:2020:IPP,
author = "Federico Silla and Javier Prades and Elvira Baydal and
Carlos Rea{\~n}o",
title = "Improving the performance of physics applications in
atom-based clusters with {rCUDA}",
journal = j-J-PAR-DIST-COMP,
volume = "137",
number = "??",
pages = "160--178",
month = mar,
year = "2020",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:11 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519304034",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Skjellum:2020:FSI,
author = "Anthony Skjellum and Purushotham V. Bangalore and Ryan
E. Grant",
title = "Foreword to the Special Issue of the {Workshop on
Exascale MPI (ExaMPI 2017)}",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e5459:1--e5459:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5459",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "18 July 2019",
}
@Article{Spiliotis:2020:PII,
author = "Iraklis M. Spiliotis and Michael P. Bekakos and
Yiannis S. Boutalis",
title = "Parallel implementation of the {Image Block
Representation} using {OpenMP}",
journal = j-J-PAR-DIST-COMP,
volume = "137",
number = "??",
pages = "134--147",
month = mar,
year = "2020",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:11 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519307622",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Stpiczynski:2020:ALB,
author = "Przemys{\l}aw Stpiczy{\'n}ski",
title = "Algorithmic and language-based optimization of
{Marsa-LFIB4} pseudorandom number generator using
{OpenMP}, {OpenACC} and {CUDA}",
journal = j-J-PAR-DIST-COMP,
volume = "137",
number = "??",
pages = "238--245",
month = mar,
year = "2020",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2019.12.004",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:11 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519304885",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Sun:2020:RTS,
author = "J. Sun and N. Guan and F. Li and H. Gao and C. Shi and
W. Yi",
title = "Real-Time Scheduling and Analysis of {OpenMP} {DAG}
Tasks Supporting Nested Parallelism",
journal = j-IEEE-TRANS-COMPUT,
volume = "69",
number = "9",
pages = "1335--1348",
month = sep,
year = "2020",
CODEN = "ITCOB4",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Wed Aug 12 14:58:16 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Tiotto:2020:OCO,
author = "E. Tiotto and B. Mahjour and W. Tsang and X. Xue and
T. Islam and W. Chen",
title = "{OpenMP 4.5} compiler optimization for {GPU}
offloading",
journal = j-IBM-JRD,
volume = "64",
number = "3/4",
pages = "14:1--14:11",
month = may # "\slash " # jul,
year = "2020",
CODEN = "IBMJAE",
DOI = "https://doi.org/10.1147/JRD.2019.2962428",
ISSN = "0018-8646 (print), 2151-8556 (electronic)",
ISSN-L = "0018-8646",
bibdate = "Wed Jun 3 18:35:26 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ibmjrd.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/super.bib",
acknowledgement = ack-nhfb,
fjournal = "IBM Journal of Research and Development",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5288520",
}
@Article{Traff:2020:SIS,
author = "Jesper Larsson Tr{\"a}ff and Torsten Hoefler",
title = "Special issue: Selected papers from {EuroMPI 2019}",
journal = j-PARALLEL-COMPUTING,
volume = "99",
number = "??",
pages = "Article 102695",
month = nov,
year = "2020",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2020.102695",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Mar 29 11:36:02 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300855",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Tsiolakis:2020:NPG,
author = "Vasileios Tsiolakis and Matteo Giacomini and Ruben
Sevilla and Carsten Othmer and Antonio Huerta",
title = "Nonintrusive proper generalised decomposition for
parametrised incompressible flow problems in
{OpenFOAM}",
journal = j-COMP-PHYS-COMM,
volume = "249",
number = "??",
pages = "Article 107013",
month = apr,
year = "2020",
CODEN = "CPHCBZ",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Mar 2 13:57:36 MST 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465519303479",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Turchetto:2020:GDS,
author = "M. Turchetto and A. D. Pal{\`u} and R. Vacondio",
title = "A General Design for a Scalable {MPI-GPU}
Multi-Resolution {2D} Numerical Solver",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "5",
pages = "1036--1047",
month = may,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2961909",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 20 10:08:58 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
keywords = "AMR; CUDA; dynamic load balancing; hilbert space
filling curves; MPI; multi-GPU; multi-resolution grid;
shallow water equations (SWE)",
}
@Article{Valero-Lara:2020:SFA,
author = "Pedro Valero-Lara and Sandra Catal{\'a}n and Xavier
Martorell and Tetsuzo Usui and Jes{\'u}s Labarta",
title = "{sLASs}: a fully automatic auto-tuned linear algebra
library based on {OpenMP} extensions implemented in
{OmpSs} ({LASs} Library)",
journal = j-J-PAR-DIST-COMP,
volume = "138",
number = "??",
pages = "153--171",
month = apr,
year = "2020",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:11 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519303417",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Wang:2020:EPE,
author = "X. Wang and X. Qian and A. Knoll and K. Huang",
title = "Efficient Performance Estimation and Work-Group Size
Pruning for {OpenCL} Kernels on {GPUs}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "5",
pages = "1089--1106",
month = may,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2958343",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Thu Feb 20 10:08:58 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
keywords = "Analytical models; Estimation; GPU; Graphics
processing units; Hardware; Kernel; Measurement;
OpenCL; performance estimation; performance tuning;
Runtime; work-group size",
}
@Article{Weng:2020:CMS,
author = "Tien-Hsiung Weng and Kuan-Ching Li and Zhiliu Yang and
Chen Liu",
title = "On the code modernization of shared sampling alpha
matting with {OpenMP}",
journal = j-FUT-GEN-COMP-SYS,
volume = "107",
number = "??",
pages = "177--191",
month = jun,
year = "2020",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2019.12.012",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Fri Jun 19 07:44:14 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X19314116",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{White:2020:OPP,
author = "Sam White and Laxmikant V. Kale",
title = "Optimizing point-to-point communication between
adaptive {MPI} endpoints in shared memory",
journal = j-CCPE,
volume = "32",
number = "3",
pages = "e4467:1--e4467:??",
day = "10",
month = feb,
year = "2020",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.4467",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Wed Mar 31 07:52:13 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurr. Comput.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "12 March 2018",
}
@Article{Yu:2020:EPW,
author = "C. Yu and S. Tsao",
title = "Efficient and Portable Workgroup Size Tuning",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "2",
pages = "455--469",
month = feb,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2937295",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "automatic performance tuning; Computational modeling;
Graphics processing units; Hardware; Indexes; Kernel;
microbenchmarking; OpenCL; Performance evaluation;
Tuning; workgroup size selection",
}
@Article{Zarebavani:2020:CCB,
author = "B. Zarebavani and F. Jafarinejad and M. Hashemi and S.
Salehkaleybar",
title = "{cuPC}: {CUDA}-Based Parallel {PC} Algorithm for
Causal Structure Learning on {GPU}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "3",
pages = "530--542",
month = mar,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2939126",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "http://www.computer.org/portal/web/csdl/transactions/tpds",
keywords = "Bayes methods; Bayesian networks; causal discovery;
CUDA; GPU; Graphical models; Graphics processing units;
machine learning; Markov processes; Parallel
algorithms; parallel processing; PC algorithm;
Scalability",
}
@Article{Zhang:2020:CTE,
author = "T. Zhang and X. Liu and X. Wang and A. Walid",
title = "{cuTensor-Tubal}: Efficient Primitives for Tubal-Rank
Tensor Learning Operations on {GPUs}",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "31",
number = "3",
pages = "595--610",
month = mar,
year = "2020",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2019.2940192",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Wed Jan 22 06:09:50 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
keywords = "Computational modeling; Computer architecture;
cuTensor-tubal library; Frequency-domain analysis; GPU;
Graphics processing units; Libraries; Low-tubal-rank
tensor decomposition; Matrix decomposition; t-SVD;
tensor completion",
}
@Article{Zhou:2020:CHM,
author = "Huan Zhou and Jos{\'e} Gracia and Naweiluo Zhou and
Ralf Schneider",
title = "Collectives in hybrid {MPI+MPI} code: Design, practice
and performance",
journal = j-PARALLEL-COMPUTING,
volume = "99",
number = "??",
pages = "Article 102669",
month = nov,
year = "2020",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2020.102669",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Mar 29 11:36:02 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819120300612",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Zhou:2020:EOP,
author = "Hongyang Zhou and G{\'a}bor T{\'o}th",
title = "Efficient {OpenMP} parallelization to a complex {MPI}
parallel magnetohydrodynamics code",
journal = j-J-PAR-DIST-COMP,
volume = "139",
number = "??",
pages = "65--74",
month = may,
year = "2020",
CODEN = "JPDCER",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Wed Mar 18 09:26:12 MDT 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731519304903",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Aldinucci:2021:PPS,
author = "Marco Aldinucci and Valentina Cesare and Iacopo
Colonnelli and Alberto Riccardo Martinelli and Gianluca
Mittone and Barbara Cantalupo and Carlo Cavazzoni and
Maurizio Drocco",
title = "Practical parallelization of scientific applications
with {OpenMP}, {OpenACC} and {MPI}",
journal = j-J-PAR-DIST-COMP,
volume = "157",
number = "??",
pages = "13--29",
month = nov,
year = "2021",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2021.05.017",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Feb 10 06:39:21 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731521001295",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Andoh:2021:AMM,
author = "Yoshimichi Andoh and Shin-ichi Ichikawa and Tatsuya
Sakashita and Noriyuki Yoshii and Susumu Okazaki",
title = "Algorithm to minimize {MPI} communications in the
parallelized fast multipole method combined with
molecular dynamics calculations",
journal = j-J-COMPUT-CHEM,
volume = "42",
number = "15",
pages = "1073--1087",
day = "5",
month = jun,
year = "2021",
CODEN = "JCCHDD",
DOI = "https://doi.org/10.1002/jcc.26524",
ISSN = "0192-8651 (print), 1096-987X (electronic)",
ISSN-L = "0192-8651",
bibdate = "Mon May 17 16:26:14 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
http://www.math.utah.edu/pub/tex/bib/jcomputchem2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "J. Comput. Chem.",
fjournal = "Journal of Computational Chemistry",
journal-URL = "http://www.interscience.wiley.com/jpages/0192-8651",
onlinedate = "29 March 2021",
}
@Article{Antonelli:2021:CBI,
author = "L. Antonelli and E. Francomano and F. Gregoretti",
title = "A {CUDA}-based implementation of an improved {SPH}
method on {GPU}",
journal = j-APPL-MATH-COMP,
volume = "409",
number = "??",
pages = "Article 125482",
day = "15",
month = nov,
year = "2021",
CODEN = "AMHCBQ",
DOI = "https://doi.org/10.1016/j.amc.2020.125482",
ISSN = "0096-3003 (print), 1873-5649 (electronic)",
ISSN-L = "0096-3003",
bibdate = "Mon Jan 31 07:58:57 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/applmathcomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0096300320304410",
acknowledgement = ack-nhfb,
fjournal = "Applied Mathematics and Computation",
journal-URL = "http://www.sciencedirect.com/science/journal/00963003",
}
@Article{Betcke:2021:DHP,
author = "Timo Betcke and Matthew W. Scroggs",
title = "Designing a High-Performance Boundary Element Library
With {OpenCL} and {Numba}",
journal = j-COMPUT-SCI-ENG,
volume = "23",
number = "4",
pages = "18--28",
month = jul # "\slash " # aug,
year = "2021",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2021.3085420",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu Jul 29 07:00:57 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Cesarini:2021:CRT,
author = "D. Cesarini and A. Bartolini and P. Bonf{\`a} and C.
Cavazzoni and L. Benini",
title = "{COUNTDOWN}: a Run-Time Library for
Performance-Neutral Energy Saving in {MPI}
Applications",
journal = j-IEEE-TRANS-COMPUT,
volume = "70",
number = "5",
pages = "682--695",
year = "2021",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2020.2995269",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Apr 8 06:29:24 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Chapp:2021:IDS,
author = "Dylan Chapp and Nigel Tan and Sanjukta Bhowmick and
Michela Taufer",
title = "Identifying Degree and Sources of Non-Determinism in
{MPI} Applications Via Graph Kernels",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "32",
number = "12",
pages = "2936--2952",
month = dec,
year = "2021",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2021.3081530",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Jun 4 09:55:50 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Chen:2021:CCR,
author = "Genlang Chen and Jiajian Zhang and Chaoyi Pang",
title = "{CRState}: checkpoint/restart of {OpenCL} program for
in-kernel applications",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "6",
pages = "5426--5467",
month = jun,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03460-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:20:01 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03460-2",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 06 November 2020 Pages: 5426 - 5467",
}
@Article{Dalcin:2021:MSU,
author = "Lisandro Dalcin and Yao-Lung L. Fang",
title = "{mpi4py}: Status Update After 12 Years of
Development",
journal = j-COMPUT-SCI-ENG,
volume = "23",
number = "4",
pages = "47--54",
month = jul # "\slash " # aug,
year = "2021",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2021.3083216",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu Jul 29 07:00:57 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Dosanjh:2021:IEM,
author = "Matthew G. F. Dosanjh and Andrew Worley and Derek
Schafer and Prema Soundararajan and Sheikh Ghafoor and
Anthony Skjellum and Purushotham V. Bangalore and Ryan
E. Grant",
title = "Implementation and evaluation of {MPI 4.0} partitioned
communication libraries",
journal = j-PARALLEL-COMPUTING,
volume = "108",
number = "??",
pages = "??--??",
month = dec,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102827",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:17 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000752",
acknowledgement = ack-nhfb,
articleno = "102827",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Ferreira:2021:EMR,
author = "Kurt B. Ferreira and Scott Levy",
title = "Evaluating {MPI} resource usage summary statistics",
journal = j-PARALLEL-COMPUTING,
volume = "108",
number = "??",
pages = "??--??",
month = dec,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102825",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:17 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000739",
acknowledgement = ack-nhfb,
articleno = "102825",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Gong:2021:TDG,
author = "Dunwei Gong and Baicai Sun and Xiangjuan Yao and Tian
Tian",
title = "Test Data Generation for Path Coverage of {MPI}
Programs Using {SAEO}",
journal = j-TOSEM,
volume = "30",
number = "2",
pages = "17:1--17:37",
month = mar,
year = "2021",
CODEN = "ATSMER",
DOI = "https://doi.org/10.1145/3423132",
ISSN = "1049-331X (print), 1557-7392 (electronic)",
ISSN-L = "1049-331X",
bibdate = "Thu Mar 18 06:18:01 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tosem.bib",
URL = "https://dl.acm.org/doi/10.1145/3423132",
abstract = "Message-passing interface (MPI) programs, a typical
kind of parallel programs, have been commonly used in
various applications. However, it generally takes
exhaustive computation to run these programs when
generating test data to test them. In this \ldots{}",
acknowledgement = ack-nhfb,
articleno = "17",
fjournal = "ACM Transactions on Software Engineering and
Methodology",
journal-URL = "https://dl.acm.org/loi/tosem",
}
@Article{Hahne:2021:APP,
author = "Jens Hahne and Stephanie Friedhoff and Matthias
Bolten",
title = "{Algorithm 1016}: {PyMGRIT}: a {Python} Package for
the Parallel-in-time Method {MGRIT}",
journal = j-TOMS,
volume = "47",
number = "2",
pages = "19:1--19:22",
month = apr,
year = "2021",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3446979",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Tue Apr 27 08:23:28 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/10.1145/3446979",
abstract = "In this article, we introduce the Python framework
PyMGRIT, which implements the
multigrid-reduction-in-time (MGRIT) algorithm for
solving (non-)linear systems arising from the
discretization of time-dependent problems. The MGRIT
algorithm is a reduction-based iterative method that
allows parallel-in-time simulations, i.e., calculating
multiple time steps simultaneously in a simulation,
using a time-grid hierarchy. The PyMGRIT framework
includes many different variants of the MGRIT
algorithm, ranging from different multigrid cycle types
and relaxation schemes, various coarsening strategies,
including time-only and space-time coarsening, and the
ability to utilize different time integrators on
different levels in the multigrid hierarchy. The
comprehensive documentation with tutorials and many
examples and the fully documented code allow an easy
start into the work with the package. The functionality
of the code is ensured by automated serial and parallel
tests using continuous integration. PyMGRIT supports
serial runs suitable for prototyping and testing of new
approaches, as well as parallel runs using the Message
Passing Interface (MPI). In this manuscript, we
describe the implementation of the MGRIT algorithm in
PyMGRIT and present the usage from both a user and a
developer point of view. Three examples illustrate
different aspects of the package itself, especially
running tests with pure time parallelism, as well as
space-time parallelism through the coupling of PyMGRIT
with PETSc or Firedrake.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Halbiniak:2021:EOH,
author = "Kamil Halbiniak and Lukasz Szustak and Tomasz Olas and
Roman Wyrzykowski and Pawel Gepner",
title = "Exploration of {OpenCL} Heterogeneous Programming for
Porting Solidification Modeling to {CPU-GPU}
Platforms",
journal = j-CCPE,
volume = "33",
number = "4",
pages = "e6011:1--e6011:??",
day = "25",
month = feb,
year = "2021",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.6011",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue May 18 08:31:21 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "09 October 2020",
}
@Article{Ho:2021:GFD,
author = "Nhut-Minh Ho and Himeshi {De Silva} and Weng-Fai
Wong",
title = "{GRAM}: a Framework for Dynamically Mixing Precisions
in {GPU} Applications",
journal = j-TACO,
volume = "18",
number = "2",
pages = "19:1--19:24",
month = mar,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3441830",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Sat Mar 20 17:25:10 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/doi/10.1145/3441830",
abstract = "This article presents GRAM (GPU-based Runtime Adaption
for Mixed-precision) a framework for the effective use
of mixed precision arithmetic for CUDA programs. Our
method provides a fine-grain tradeoff between output
error and performance. It can create many variants that
satisfy different accuracy requirements by assigning
different groups of threads to different precision
levels adaptively at runtime. To widen the range of
applications that can benefit from its approximation,
GRAM comes with an optional half-precision approximate
math library. Using GRAM, we can trade off precision
for any performance improvement of up to 540\%,
depending on the application and accuracy
requirement.",
acknowledgement = ack-nhfb,
articleno = "19",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Hori:2021:ISM,
author = "Atsushi Hori and Emmanuel Jeannot and George Bosilca
and Takahiro Ogura and Balazs Gerofi and Jie Yin and
Yutaka Ishikawa",
title = "An international survey on {MPI} users",
journal = j-PARALLEL-COMPUTING,
volume = "108",
number = "??",
pages = "??--??",
month = dec,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102853",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:17 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000983",
acknowledgement = ack-nhfb,
articleno = "102853",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Huang:2021:LBI,
author = "Liang-Tsung Huang and Kai-Cheng Wei and Jian-An Wang",
title = "A lightweight {BLASTP} and its implementation on {CUDA
GPUs}",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "1",
pages = "322--342",
month = jan,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03267-1",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:19:58 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03267-1",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 07 April 2020 Pages: 322 - 342",
}
@Article{Jalowiecki:2021:BFS,
author = "Konrad Ja{\l}owiecki and Marek M. Rams and
Bart{\l}omiej Gardas",
title = "Brute-forcing spin-glass problems with {CUDA}",
journal = j-COMP-PHYS-COMM,
volume = "260",
number = "??",
pages = "Article 107728",
month = mar,
year = "2021",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2020.107728",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Mar 13 08:21:41 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S001046552030360X",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Keppens:2021:MAP,
author = "Rony Keppens and Jannis Teunissen and Chun Xia and
Oliver Porth",
title = "{MPI-AMRVAC}: a parallel, grid-adaptive {PDE}
toolkit",
journal = j-COMPUT-MATH-APPL,
volume = "81",
number = "??",
pages = "316--333",
day = "1",
month = jan,
year = "2021",
CODEN = "CMAPDK",
DOI = "https://doi.org/10.1016/j.camwa.2020.03.023",
ISSN = "0898-1221 (print), 1873-7668 (electronic)",
bibdate = "Sat Mar 13 10:03:14 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computmathappl2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0898122120301279",
acknowledgement = ack-nhfb,
fjournal = "Computers and Mathematics with Applications",
journal-URL = "http://www.sciencedirect.com/science/journal/08981221",
}
@Article{Kim:2021:GRP,
author = "Mingyu Kim and Nakhoon Baek",
title = "A {3D} graphics rendering pipeline implementation
based on the {openCL} massively parallel processing",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "7",
pages = "7351--7367",
month = jul,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03581-8",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:32 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03581-8",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Klemm:2021:OAH,
author = "Michael Klemm and Eduardo Qui{\~n}ones and Tucker Taft
and Dirk Ziegenbein and Sara Royuela",
title = "The {OpenMP API} for High Integrity Systems: Moving
Responsibility from Users to Vendors",
journal = j-SIGADA-LETTERS,
volume = "40",
number = "2",
pages = "48--50",
month = apr,
year = "2021",
CODEN = "AALEE5",
DOI = "https://doi.org/10.1145/3463478.3463480",
ISSN = "1094-3641 (print), 1557-9476 (electronic)",
ISSN-L = "0736-721X",
bibdate = "Mon Jun 28 15:50:16 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigada.bib",
URL = "https://dl.acm.org/doi/10.1145/3463478.3463480",
abstract = "OpenMP is traditionally focused on boosting
performance in HPC systems. However, other domains are
showing an increasing interest in the use of OpenMP by
virtue of key aspects introduced in recent versions of
the specification: the tasking model, the accelerator
model, and other features like the requires and the
assumes directives, which allow defining certain
contracts. One example is the safety-critical embedded
domain, where several efforts have been initiated
towards the adoption of OpenMP. However, the OpenMP
specification states that ``application developers are
responsible for correctly using the OpenMP API to
produce a conforming program'',being not acceptable in
high integrity systems, where aspects such as
reliability and resiliency have to be ensured at
different levels of criticality. In this scope,
programming languages like Ada propose a different
paradigm by exposing fewer features to the user, and
leaving the responsibility of safely exploiting the
full underlying architecture to the compiler and the
runtime systems, instead. The philosophy behind this
kind of model is to move the responsibility of
producing correct parallel programs from users to
vendors. In this panel, actors from different domains
involved in the use of parallel programming models for
the development of high-integrity systems share their
thoughts about this topic.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGADA Ada Letters",
journal-URL = "http://portal.acm.org/citation.cfm?id=J32",
}
@Article{Kohnke:2021:CFM,
author = "Bartosz Kohnke and Carsten Kutzner and Andreas
Beckmann and Gert Lube and Ivo Kabadshow and Holger
Dachsel and Helmut Grubm{\"u}ller",
title = "A {CUDA} fast multipole method with highly efficient
{M2L} far field evaluation",
journal = j-IJHPCA,
volume = "35",
number = "1",
pages = "97--117",
day = "1",
month = jan,
year = "2021",
CODEN = "IHPCFL",
DOI = "https://doi.org/10.1177/1094342020964857",
ISSN = "1094-3420 (print), 1741-2846 (electronic)",
ISSN-L = "1094-3420",
bibdate = "Tue May 18 15:46:08 MDT 2021",
bibsource = "http://hpc.sagepub.com/;
http://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
http://www.math.utah.edu/pub/tex/bib/ijsa.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://journals.sagepub.com/doi/full/10.1177/1094342020964857",
acknowledgement = ack-nhfb,
ajournal = "Int. J. High Perform. Comput. Appl.",
fjournal = "International Journal of High Performance Computing
Applications",
journal-URL = "https://journals.sagepub.com/home/hpc",
}
@Article{Lambert:2021:OOFa,
author = "Jacob Lambert and Seyong Lee and Jeffrey S. Vetter and
Allen D. Malony",
title = "Optimization with the {OpenACC-to-FPGA} framework on
the {Arria 10} and {Stratix 10} {FPGAs}",
journal = j-PARALLEL-COMPUTING,
volume = "104--105",
number = "??",
pages = "??--??",
month = jul,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102784",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:16 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000417",
acknowledgement = ack-nhfb,
articleno = "102784",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Lambert:2021:OOFb,
author = "Jacob Lambert and Seyong Lee and Jeffrey S. Vetter and
Allen D. Malony",
title = "Optimization with the {OpenACC}-to-{FPGA} framework on
the Arria 10 and Stratix 10 {FPGAs}",
journal = j-PARALLEL-COMPUTING,
volume = "104--105",
number = "??",
pages = "??--??",
month = jul,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102784",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:16 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000417",
acknowledgement = ack-nhfb,
articleno = "102784",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Liu:2021:BMN,
author = "Feilong Liu and Claude Barthels and Spyros Blanas and
Hideaki Kimura and Garret Swart",
title = "Beyond {MPI}: New Communication Interfaces for
Database Systems and Data-Intensive Applications",
journal = j-SIGMOD,
volume = "49",
number = "4",
pages = "12--17",
month = mar,
year = "2021",
CODEN = "SRECD8",
DOI = "https://doi.org/10.1145/3456859.3456862",
ISSN = "0163-5808 (print), 1943-5835 (electronic)",
ISSN-L = "0163-5808",
bibdate = "Thu Mar 11 06:12:21 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigmod.bib",
URL = "https://dl.acm.org/doi/10.1145/3456859.3456862",
abstract = "Networks with Remote Direct Memory Access (RDMA)
support are becoming increasingly common. RDMA,
however, offers a limited programming interface to
remote memory that consists of read, write and atomic
operations. With RDMA alone, completing the most basic
\ldots{}",
acknowledgement = ack-nhfb,
fjournal = "SIGMOD Record (ACM Special Interest Group on
Management of Data)",
journal-URL = "https://dl.acm.org/loi/sigmod",
}
@Article{Lyu:2021:FFA,
author = "Xing-long Lyu and Tiexiang Li and Tsung-ming Huang and
Jia-wei Lin and Wen-wei Lin and Sheng Wang",
title = "{FAME}: Fast Algorithms for {Maxwell}'s Equations for
Three-dimensional Photonic Crystals",
journal = j-TOMS,
volume = "47",
number = "3",
pages = "26:1--26:24",
month = jun,
year = "2021",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3446329",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Sun Jun 27 07:42:02 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/10.1145/3446329",
abstract = "In this article, we propose the Fast Algorithms for
Maxwell's Equations (FAME) package for solving
Maxwell's equations for modeling three-dimensional
photonic crystals. FAME combines the null-space free
method with fast Fourier transform (FFT)-based
matrix-vector multiplications to solve the generalized
eigenvalue problems (GEPs) arising from Yee's
discretization. The GEPs are transformed into a
null-space free standard eigenvalue problem with a
Hermitian positive-definite coefficient matrix. The
computation times for FFT-based matrix-vector
multiplications with matrices of dimension 7 million
are only $ 0.33 $ and $ 3.6 \times 10^{-3} $ seconds
using MATLAB with an Intel Xeon CPU and CUDA C++
programming with a single NVIDIA Tesla P100 GPU,
respectively. Such multiplications significantly reduce
the computational costs of the conjugate gradient
method for solving linear systems. We successfully use
FAME on a single P100 GPU to solve a set of GEPs with
matrices of dimension more than 19 million, in 127 to
191 seconds per problem. These results demonstrate the
potential of our proposed package to enable large-scale
numerical simulations for novel physical discoveries
and engineering applications of photonic crystals.",
acknowledgement = ack-nhfb,
articleno = "26",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Ma:2021:CSB,
author = "Wenpeng Ma and Wu Yuan and Xiazhen Liu",
title = "A Comparative Study of Block Incomplete Sparse
Approximate Inverses Preconditioning on {Tesla K20} and
{V100} {GPUs}",
journal = j-ALGORITHMS-BASEL,
volume = "14",
number = "7",
month = jul,
year = "2021",
CODEN = "ALGOCH",
DOI = "https://doi.org/10.3390/a14070204",
ISSN = "1999-4893 (electronic)",
ISSN-L = "1999-4893",
bibdate = "Fri Jul 23 15:05:28 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/algorithms.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.mdpi.com/1999-4893/14/7/204",
acknowledgement = ack-nhfb,
articleno = "204",
fjournal = "Algorithms (Basel)",
journal-URL = "https://www.mdpi.com/journal/algorithms",
pagecount = "??",
}
@Article{Margolin:2021:TBF,
author = "Alexander Margolin and Amnon Barak",
title = "Tree-based fault-tolerant collective operations for
{MPI}",
journal = j-CCPE,
volume = "33",
number = "14",
pages = "e5826:1--e5826:??",
day = "25",
month = jul,
year = "2021",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5826",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 22 09:49:55 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurrency Computat., Pract. Exper.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "15 June 2020",
}
@Article{Martinez-Noriega:2021:COE,
author = "Edgar Josafat Martinez-Noriega and Syunji Yazaki and
Tetsu Narumi",
title = "{CUDA} offloading for energy-efficient and
high-frame-rate simulations using tablets",
journal = j-CCPE,
volume = "33",
number = "2",
pages = "e5488:1--e5488:??",
day = "25",
month = jan,
year = "2021",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5488",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue May 18 08:31:19 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "23 August 2019",
}
@Article{Matwiejew:2021:QFP,
author = "Edric Matwiejew and Jingbo Wang",
title = "{QSW\_MPI}: a framework for parallel simulation of
quantum stochastic walks",
journal = j-COMP-PHYS-COMM,
volume = "260",
number = "??",
pages = "Article 107724",
month = mar,
year = "2021",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2020.107724",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Mar 13 08:21:41 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465520303581",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Meyer:2021:IBH,
author = "Bruno Henrique Meyer and Aurora Trinidad Ramirez Pozo
and Wagner M. Nunan Zola",
title = "Improving {Barnes--Hut} {$t$-SNE} Algorithm in Modern
{GPU} Architectures with Random Forest {KNN} and
Simulated Wide-Warp",
journal = j-JETC,
volume = "17",
number = "4",
pages = "53:1--53:26",
month = oct,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447779",
ISSN = "1550-4832",
ISSN-L = "1550-4832",
bibdate = "Tue Sep 14 06:51:04 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/bibnet/subjects/fastmultipole.bib;
http://www.math.utah.edu/pub/tex/bib/jetc.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/10.1145/3447779",
abstract = "The $t$-Distributed Stochastic Neighbor Embedding
(t-SNE) is a widely used technique for dimensionality
reduction but is limited by its scalability when
applied to large datasets. Recently, BH-tSNE was
proposed; this is a successful approximation that
transforms a step of the original algorithm into an
N-Body simulation problem that can be solved by a
modified Barnes-Hut algorithm. However, this
improvement still has limitations to process large data
volumes (millions of records). Late studies, such as
$t$-SNE-CUDA, have used GPUs to implement highly
parallel BH-tSNE. In this research we have developed a
new GPU BH-tSNE implementation that produces the
embedding of multidimensional data points into
three-dimensional space. We examine scalability issues
in two of the most expensive steps of GPU BH-tSNE by
using efficient memory access strategies, recent
acceleration techniques, and a new approach to compute
the KNN graph structure used in BH-tSNE with GPU. Our
design allows up to 460\% faster execution when
compared to the $t$-SNE-CUDA implementation. Although
our SIMD acceleration techniques were used in a modern
GPU setup, we have also verified a potential for
applications in the context of multi-core processors.",
acknowledgement = ack-nhfb,
articleno = "53",
fjournal = "ACM Journal on Emerging Technologies in Computing
Systems (JETC)",
journal-URL = "https://dl.acm.org/loi/jetc",
}
@Article{Muller:2021:MAE,
author = "Stefan K. Muller and Jan Hoffmann",
title = "Modeling and analyzing evaluation cost of {CUDA}
kernels",
journal = j-PACMPL,
volume = "5",
number = "POPL",
pages = "25:1--25:31",
month = jan,
year = "2021",
DOI = "https://doi.org/10.1145/3434306",
bibdate = "Tue Mar 30 08:10:58 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pacmpl.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/10.1145/3434306",
abstract = "General-purpose programming on GPUs (GPGPU) is
becoming increasingly in vogue as applications such as
machine learning and scientific computing demand high
throughput in vector-parallel applications. NVIDIA's
CUDA toolkit seeks to make GPGPU programming \ldots{}",
acknowledgement = ack-nhfb,
articleno = "25",
fjournal = "Proceedings of the ACM on Programming Languages",
journal-URL = "https://pacmpl.acm.org/",
}
@Article{Munch:2021:HDE,
author = "Peter Munch and Katharina Kormann and Martin
Kronbichler",
title = "\pkg{hyper.deal}: an Efficient, Matrix-free
Finite-element Library for High-dimensional Partial
Differential Equations",
journal = j-TOMS,
volume = "47",
number = "4",
pages = "33:1--33:34",
month = dec,
year = "2021",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3469720",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Sep 29 06:58:41 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/10.1145/3469720",
abstract = "This work presents the efficient, matrix-free
finite-element library hyper.deal for solving partial
differential equations in two up to six dimensions with
high-order discontinuous Galerkin methods. It builds
upon the low-dimensional finite-element library deal.II
to create complex low-dimensional meshes and to operate
on them individually. These meshes are combined via a
tensor product on the fly, and the library provides new
special-purpose highly optimized matrix-free functions
exploiting domain decomposition as well as shared
memory via MPI-3.0 features. Both node-level
performance analyses and strong/weak-scaling studies on
up to 147,456 CPU cores confirm the efficiency of the
implementation. Results obtained with the library
hyper.deal are reported for high-dimensional advection
problems and for the solution of the Vlasov--Poisson
equation in up to six-dimensional phase space.",
acknowledgement = ack-nhfb,
articleno = "33",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Muruganandam:2021:OSR,
author = "Paulsamy Muruganandam and Antun Balaz and Sadhan K.
Adhikari",
title = "\pkg{OpenMP} solver for rotating spin-1 spin-orbit-
and {Rabi}-coupled {Bose--Einstein} condensates",
journal = j-COMP-PHYS-COMM,
volume = "264",
number = "??",
pages = "Article 107926",
month = jul,
year = "2021",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2021.107926",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Wed Jun 9 09:57:27 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465521000618",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Nguyen:2021:EMA,
author = "Truong Thao Nguyen and Mohamed Wahib and Ryousei
Takano",
title = "Efficient {MPI-AllReduce} for large-scale deep
learning on {GPU-clusters}",
journal = j-CCPE,
volume = "33",
number = "12",
pages = "e5574:1--e5574:??",
day = "25",
month = jun,
year = "2021",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5574",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 22 09:49:53 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurrency Computat., Pract. Exper.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "09 December 2019",
}
@Article{Perepu:2021:OIP,
author = "Pavan Kumar Perepu",
title = "{OpenMP} Implementation of Parallel Longest Common
Subsequence Algorithm for Mathematical Expression
Retrieval",
journal = j-PARALLEL-PROCESS-LETT,
volume = "31",
number = "02",
pages = "??--??",
month = jun,
year = "2021",
CODEN = "PPLTEE",
DOI = "https://doi.org/10.1142/S0129626421500079",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Thu Feb 17 06:50:36 MST 2022",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.worldscientific.com/doi/10.1142/S0129626421500079",
abstract = "Given a mathematical expression in LaTeX or MathML
format, retrieval algorithm extracts similar
expressions from a database. In our previous work, we
have used Longest Common Subsequence (LCS) algorithm to
match two expressions of lengths, m and n , which takes
O \( mn \) time complexity. If there are T database
expressions, total complexity is O \( Tmn \) , and an
increase in T also increases this complexity. In the
present work, we propose to use parallel LCS algorithm
in our retrieval process. Parallel LCS has O \( \max \(
m \, n \) \) time complexity with \max \( m \, n \)
processors and total complexity can be reduced to O \(
T \max \( m \, n \) \) . For our experimentation,
OpenMP based implementation has been used on Intel i3
processor with 4 cores. However, for smaller
expressions, parallel version takes more time as the
implementation overhead dominates the algorithmic
improvement. As such, we have proposed to use parallel
version, selectively, only on larger expressions, in
our retrieval algorithm to achieve better performance.
We have compared the sequential and parallel versions
of our ME retrieval algorithm, and the performance
results have been reported on a database of 829
mathematical expressions.",
acknowledgement = ack-nhfb,
articleno = "2150007",
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Pimentel-Garcia:2021:EIP,
author = "Ernesto Pimentel-Garc{\'\i}a and Carlos Par{\'e}s and
Manuel J. Castro and Julian Koellermeier",
title = "On the efficient implementation of {PVM} methods and
simple {Riemann} solvers. {Application} to the {Roe}
method for large hyperbolic systems",
journal = j-APPL-MATH-COMP,
volume = "388",
number = "??",
pages = "Article 125544",
day = "1",
month = jan,
year = "2021",
CODEN = "AMHCBQ",
DOI = "https://doi.org/10.1016/j.amc.2020.125544",
ISSN = "0096-3003 (print), 1873-5649 (electronic)",
ISSN-L = "0096-3003",
bibdate = "Sat Mar 13 06:39:48 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/applmathcomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0096300320305002",
acknowledgement = ack-nhfb,
fjournal = "Applied Mathematics and Computation",
journal-URL = "http://www.sciencedirect.com/science/journal/00963003",
}
@Article{Pinho:2021:RTI,
author = "Luis Miguel Pinho and Sara Royuela and Eduardo
Qui{\~n}ones",
title = "Real-time Issues in the {Ada} Parallel Model with
{OpenMP}",
journal = j-SIGADA-LETTERS,
volume = "40",
number = "2",
pages = "96--102",
month = apr,
year = "2021",
CODEN = "AALEE5",
DOI = "https://doi.org/10.1145/3463478.3463491",
ISSN = "1094-3641 (print), 1557-9476 (electronic)",
ISSN-L = "0736-721X",
bibdate = "Mon Jun 28 15:50:16 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigada.bib",
URL = "https://dl.acm.org/doi/10.1145/3463478.3463491",
abstract = "The current proposal for the next revision of the Ada
language considers the possibility to map the language
parallel features to an underlying OpenMP runtime. As
previously presented, and discussed in previous
workshops, the works on fine-grain parallelism in Ada
map well to the OpenMP tasking model for parallelism.
Nevertheless, and although the general model of
integration, and the semantic constructs are already
reflected in the proposed revision of the standard, the
integration of these new features with the Real-Time
Systems Annex of Ada is still not complete. This paper
presents an overview of what is supported and the still
open issues.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGADA Ada Letters",
journal-URL = "http://portal.acm.org/citation.cfm?id=J32",
}
@Article{Proficz:2021:AGA,
author = "Jerzy Proficz",
title = "All-gather Algorithms Resilient to Imbalanced Process
Arrival Patterns",
journal = j-TACO,
volume = "18",
number = "4",
pages = "41:1--41:22",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3460122",
ISSN = "1544-3566 (print), 1544-3973 (electronic)",
ISSN-L = "1544-3566",
bibdate = "Mon Oct 4 07:14:07 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/taco.bib",
URL = "https://dl.acm.org/doi/10.1145/3460122",
abstract = "Two novel algorithms for the all-gather operation
resilient to imbalanced process arrival patterns (PATs)
are presented. The first one, Background Disseminated
Ring (BDR), is based on the regular parallel ring
algorithm often supplied in MPI implementations and
exploits an auxiliary background thread for early data
exchange from faster processes to accelerate the
performed all-gather operation. The other algorithm,
Background Sorted Linear synchronized tree with
Broadcast (BSLB), is built upon the already existing
PAP-aware gather algorithm, that is, Background Sorted
Linear Synchronized tree (BSLS), followed by a regular
broadcast distributing gathered data to all
participating processes. The background of the
imbalanced PAP subject is described, along with the PAP
monitoring and evaluation topics. An experimental
evaluation of the algorithms based on a proposed
mini-benchmark is presented. The mini-benchmark was
performed over 2,000 times in a typical HPC cluster
architecture with homogeneous compute nodes. The
obtained results are analyzed according to different
PATs, data sizes, and process numbers, showing that the
proposed optimization works well for various
configurations, is scalable, and can significantly
reduce the all-gather elapsed times, in our case, up to
factor 1.9 or 47\% in comparison with the best
state-of-the-art solution.",
acknowledgement = ack-nhfb,
articleno = "41",
fjournal = "ACM Transactions on Architecture and Code Optimization
(TACO)",
journal-URL = "https://dl.acm.org/loi/taco",
}
@Article{Quaranta:2021:NMH,
author = "Lionel Quaranta and Lalith Maddegedara",
title = "A novel {MPI+MPI} hybrid approach combining {MPI-3}
shared memory windows and {C11\slash C++11} memory
model",
journal = j-J-PAR-DIST-COMP,
volume = "157",
number = "??",
pages = "125--144",
month = nov,
year = "2021",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2021.06.008",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Feb 10 06:39:21 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S074373152100143X",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Ramachandran:2021:PPB,
author = "Prabhu Ramachandran and Aditya Bhosale and Kunal Puri
and Pawan Negi and Abhinav Muta and A. Dinesh and
Dileep Menon and Rahul Govind and Suraj Sanka and Amal
S. Sebastian and Ananyo Sen and Rohan Kaushik and
Anshuman Kumar and Vikas Kurapati and Mrinalgouda Patil
and Deep Tavker and Pankaj Pandey and Chandrashekhar
Kaushik and Arkopal Dutt and Arpit Agarwal",
title = "{PySPH}: a {Python}-based Framework for Smoothed
Particle Hydrodynamics",
journal = j-TOMS,
volume = "47",
number = "4",
pages = "34:1--34:38",
month = dec,
year = "2021",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3460773",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Wed Sep 29 06:58:41 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/python.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/10.1145/3460773",
abstract = "PySPH is an open-source, Python-based, framework for
particle methods in general and Smoothed Particle
Hydrodynamics (SPH) in particular. PySPH allows a user
to define a complete SPH simulation using pure Python.
High-performance code is generated from this high-level
Python code and executed on either multiple cores, or
on GPUs, seamlessly. It also supports distributed
execution using MPI. PySPH supports a wide variety of
SPH schemes and formulations. These include,
incompressible and compressible fluid flow, elastic
dynamics, rigid body dynamics, shallow water equations,
and other problems. PySPH supports a variety of
boundary conditions including mirror, periodic, solid
wall, and inlet/outlet boundary conditions. The package
is written to facilitate reuse and reproducibility.
This article discusses the overall design of PySPH and
demonstrates many of its features. Several example
results are shown to demonstrate the range of features
that PySPH provides.",
acknowledgement = ack-nhfb,
articleno = "34",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Ramroach:2021:ADP,
author = "Sterling Ramroach and Ajay Joshi",
title = "Accelerating Data-Parallel Neural Network Training
with Weighted-Averaging Reparameterisation",
journal = j-PARALLEL-PROCESS-LETT,
volume = "31",
number = "02",
pages = "??--??",
month = jun,
year = "2021",
DOI = "https://doi.org/10.1142/S0129626421500092",
ISSN = "0129-6264 (print), 1793-642X (electronic)",
ISSN-L = "0129-6264",
bibdate = "Thu Feb 17 06:50:36 MST 2022",
bibsource = "http://ejournals.wspc.com.sg/ppl/;
http://www.math.utah.edu/pub/tex/bib/parallelprocesslett.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.worldscientific.com/doi/10.1142/S0129626421500092",
abstract = "Recent advances in artificial intelligence has shown a
direct correlation between the performance of a network
and the number of hidden layers within the network. The
Compute Unified Device Architecture (CUDA) framework
facilitates the movement of heavy computation from the
CPU to the graphics processing unit (GPU) and is used
to accelerate the training of neural networks. In this
paper, we consider the problem of data-parallel neural
network training. We compare the performance of
training the same neural network on the GPU with and
without data parallelism. When data parallelism is
used, we compare with both the conventional averaging
of coefficients and our proposed method. We set out to
show that not all sub-networks are equal and thus,
should not be treated as equals when normalising weight
vectors. The proposed method achieved state of the art
accuracy faster than conventional training along with
better classification performance in some cases.",
acknowledgement = ack-nhfb,
articleno = "2150009",
fjournal = "Parallel Processing Letters",
journal-URL = "http://www.worldscientific.com/loi/ppl",
}
@Article{Reano:2021:RRC,
author = "Carlos Rea{\~n}o and Federico Silla",
title = "Redesigning the {rCUDA} communication layer for a
better adaptation to the underlying hardware",
journal = j-CCPE,
volume = "33",
number = "14",
pages = "e5481:1--e5481:??",
day = "25",
month = jul,
year = "2021",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.5481",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 22 09:49:55 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurrency Computat., Pract. Exper.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "19 August 2019",
}
@Article{Rundo:2021:CPM,
author = "Leonardo Rundo and Andrea Tangherloni and Marco S.
Nobile",
title = "A {CUDA}-powered method for the feature extraction and
unsupervised analysis of medical images",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "8",
pages = "8514--8531",
month = aug,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03565-8",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:32 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03565-8",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Schuchart:2021:CBC,
author = "Joseph Schuchart and Philipp Samfass and Christoph
Niethammer and Jos{\'e} Gracia and George Bosilca",
title = "Callback-based completion notification using {MPI}
Continuations",
journal = j-PARALLEL-COMPUTING,
volume = "106",
number = "??",
pages = "??--??",
month = sep,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102793",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:17 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000466",
acknowledgement = ack-nhfb,
articleno = "102793",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Schwarzrock:2021:RNI,
author = "J. Schwarzrock and C. C. {de Oliveira} and M. Ritt and
A. F. Lorenzon and A. C. S. Beck",
title = "A Runtime and Non-Intrusive Approach to Optimize {EDP}
by Tuning Threads and {CPU} Frequency for {OpenMP}
Applications",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "32",
number = "7",
pages = "1713--1724",
year = "2021",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2020.3046537",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Mar 19 06:51:50 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Sojoodi:2021:IGG,
author = "Amir Hossein Sojoodi and Majid Salimi Beni and Farshad
Khunjush",
title = "{Igniteg-GPU}: a {GPU}-enabled in-memory computing
architecture on clusters",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "3",
pages = "3165--3192",
month = mar,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03390-z",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:19:59 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03390-z",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 27 July 2020 Pages: 3165 - 3192",
}
@Article{Spiliotis:2021:PCD,
author = "Iraklis M. Spiliotis and Charalampos Sitaridis and
Michael P. Bekakos",
title = "Parallel Computation of Discrete Orthogonal Moment on
Block Represented Images Using {OpenMP}",
journal = j-INT-J-PARALLEL-PROG,
volume = "49",
number = "3",
pages = "440--462",
month = jun,
year = "2021",
CODEN = "IJPPE5",
DOI = "https://doi.org/10.1007/s10766-021-00713-2",
ISSN = "0885-7458 (print), 1573-7640 (electronic)",
ISSN-L = "0885-7458",
bibdate = "Fri May 14 08:58:34 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/intjparallelprogram.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s10766-021-00713-2",
acknowledgement = ack-nhfb,
fjournal = "International Journal of Parallel Programming",
journal-URL = "http://link.springer.com/journal/10766",
online-date = "Published: 15 April 2021 Pages: 440 - 462",
}
@Article{Sun:2021:ACW,
author = "J. Sun and N. Guan and J. Sun and X. Zhang and Y. Chi
and F. Li",
title = "Algorithms for Computing the {WCRT} Bound of {OpenMP}
Task Systems With Conditional Branches",
journal = j-IEEE-TRANS-COMPUT,
volume = "70",
number = "1",
pages = "57--71",
month = jan,
year = "2021",
CODEN = "ITCOB4",
DOI = "https://doi.org/10.1109/TC.2020.2984502",
ISSN = "0018-9340 (print), 1557-9956 (electronic)",
ISSN-L = "0018-9340",
bibdate = "Thu Dec 17 19:35:03 2020",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranscomput2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Computers",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=12",
}
@Article{Taft:2021:LMA,
author = "S. Tucker Taft",
title = "A Layered Mapping of {Ada 202X} to {OpenMP}",
journal = j-SIGADA-LETTERS,
volume = "40",
number = "2",
pages = "55--58",
month = apr,
year = "2021",
CODEN = "AALEE5",
DOI = "https://doi.org/10.1145/3463478.3463482",
ISSN = "1094-3641 (print), 1557-9476 (electronic)",
ISSN-L = "0736-721X",
bibdate = "Mon Jun 28 15:50:16 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/sigada.bib",
URL = "https://dl.acm.org/doi/10.1145/3463478.3463482",
abstract = "The OpenMP specification defines a set of compiler
directives, library routines, and environment variables
that together represent the OpenMP Application
Programming Interface, and is currently defined for C,
C++, and Fortran. The forthcoming version of Ada,
currently dubbed Ada 202X, includes lightweight
parallelism features, in particular parallel blocks and
parallel loops. All versions of Ada, since its
inception in 1983, have included ``tasking,'' which
corresponds to what are traditionally considered
``heavyweight'' parallelism features, or simply
``concurrency'' features. Ada ``tasks'' typically map
to what are called ``kernel threads,'' in that the
operating system manages them and schedules them.
However, one of the goals of lightweight parallelism is
to reduce overhead by doing more of the management
outside the kernel of the operating system, using a
light-weight-thread (LWT) scheduler. The OpenMP library
routines support both levels of threading, but for Ada
202X, the main interest is in making use of OpenMP for
its lightweight thread scheduling capabilities.",
acknowledgement = ack-nhfb,
fjournal = "ACM SIGADA Ada Letters",
journal-URL = "http://portal.acm.org/citation.cfm?id=J32",
}
@Article{Takizawa:2021:OLO,
author = "Hiroyuki Takizawa and Shinji Shiotsuki and Naoki Ebata
and Ryusuke Egawa",
title = "{OpenCL}-like offloading with metaprogramming for
{SX}-Aurora {TSUBASA}",
journal = j-PARALLEL-COMPUTING,
volume = "102",
number = "??",
pages = "Article 102754",
month = may,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102754",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Mon Mar 29 11:36:03 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000144",
acknowledgement = ack-nhfb,
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Tanaka:2021:NRP,
author = "Ushio Tanaka and Masami Saga and Junji Nakano",
title = "\pkg{NScluster}: An {R} Package for Maximum Palm
Likelihood Estimation for Cluster Point Process Models
Using {OpenMP}",
journal = j-J-STAT-SOFT,
volume = "98",
number = "??",
pages = "??--??",
month = "????",
year = "2021",
CODEN = "JSSOBK",
DOI = "https://doi.org/10.18637/jss.v98.i06",
ISSN = "1548-7660",
ISSN-L = "1548-7660",
bibdate = "Fri Jul 23 08:12:54 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jstatsoft.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.jstatsoft.org/index.php/jss/article/view/v098i06;
https://www.jstatsoft.org/index.php/jss/article/view/v098i06/v98i06.pdf",
acknowledgement = ack-nhfb,
journal-URL = "http://www.jstatsoft.org/",
}
@Article{Traff:2021:MCC,
author = "Jesper Larsson Tr{\"a}ff and Sascha Hunold and
Guillaume Mercier and Daniel J. Holmes",
title = "{MPI} collective communication through a single set of
interfaces: a case for orthogonality",
journal = j-PARALLEL-COMPUTING,
volume = "107",
number = "??",
pages = "??--??",
month = oct,
year = "2021",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102826",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:17 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121000740",
acknowledgement = ack-nhfb,
articleno = "102826",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Wang:2021:ATD,
author = "Farui Wang and Weizhe Zhang and Zheng Wang",
title = "Automatic translation of data parallel programs for
heterogeneous parallelism through {OpenMP} offloading",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "5",
pages = "4957--4987",
month = may,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03452-2",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:20:00 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03452-2",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 29 October 2020 Pages: 4957 - 4987",
}
@Article{Wang:2021:PBD,
author = "Shao-Chung Wang and Lin-Ya Yu and Li-An Her and
Yuan-Shin Hwang and Jenq-Kuen Lee",
title = "Pointer-Based Divergence Analysis for {OpenCL 2.0}
Programs",
journal = j-TOPC,
volume = "8",
number = "4",
pages = "20:1--20:23",
month = dec,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3470644",
ISSN = "2329-4949 (print), 2329-4957 (electronic)",
ISSN-L = "2329-4949",
bibdate = "Fri Dec 10 10:52:35 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/topc.bib",
URL = "https://dl.acm.org/doi/10.1145/3470644",
abstract = "A modern GPU is designed with many large thread groups
to achieve a high throughput and performance. Within
these groups, the threads are grouped into fixed-size
SIMD batches in which the same instruction is applied
to vectors of data in a lockstep. This \ldots{}",
acknowledgement = ack-nhfb,
articleno = "20",
fjournal = "ACM Transactions on Parallel Computing",
journal-URL = "https://dl.acm.org/loi/topc",
}
@Article{Wang:2021:PBS,
author = "Y. Wang and X. Jiang and N. Guan and Z. Guo and X. Liu
and W. Yi",
title = "Partitioning-Based Scheduling of {OpenMP} Task Systems
With Tied Tasks",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "32",
number = "6",
pages = "1322--1339",
year = "2021",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2020.3048373",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Fri Mar 19 06:51:50 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Xue:2021:IFG,
author = "Weicheng Xue and Charles W. Jackson and Christoper J.
Roy",
title = "An improved framework of {GPU} computing for {CFD}
applications on structured grids using {OpenACC}",
journal = j-J-PAR-DIST-COMP,
volume = "156",
number = "??",
pages = "64--85",
month = oct,
year = "2021",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2021.05.010",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Feb 10 06:39:19 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731521001155",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Xue:2021:MGP,
author = "Weicheng Xue and Christoper J. Roy",
title = "Multi-{GPU} performance optimization of a
computational fluid dynamics code using {OpenACC}",
journal = j-CCPE,
volume = "33",
number = "5",
pages = "e6036:1--e6036:??",
day = "10",
month = mar,
year = "2021",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.6036",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue May 18 08:31:21 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "28 September 2020",
}
@Article{Yang:2021:HMC,
author = "Sheng-Chun Yang and Yong-Lei Wang",
title = "A hybrid {MPI-CUDA} approach for nonequispaced
discrete {Fourier} transformation",
journal = j-COMP-PHYS-COMM,
volume = "258",
number = "??",
pages = "Article 107513",
month = jan,
year = "2021",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2020.107513",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Sat Mar 13 08:21:40 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465520302393",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Yang:2021:SSG,
author = "Lishan Yang and Bin Nie and Adwait Jog and Evgenia
Smirni",
title = "{SUGAR}: Speeding Up {GPGPU} Application Resilience
Estimation with Input Sizing",
journal = j-POMACS,
volume = "5",
number = "1",
pages = "01:1--01:29",
month = feb,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.1145/3447375",
ISSN = "2476-1249",
ISSN-L = "2476-1249",
bibdate = "Mon Mar 29 10:31:36 MDT 2021",
bibsource = "http://portal.acm.org/http://www.math.utah.edu/pub/tex/bib/pomacs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/10.1145/3447375",
abstract = "As Graphics Processing Units (GPUs) are becoming a de
facto solution for accelerating a wide range of
applications, their reliable operation is becoming
increasingly important. One of the major challenges in
the domain of GPU reliability is to \ldots{}",
acknowledgement = ack-nhfb,
articleno = "01",
fjournal = "Proceedings of the ACM on Measurement and Analysis of
Computing Systems (POMACS)",
journal-URL = "https://dl.acm.org/loi/pomacs",
}
@Article{Zhang:2021:IRP,
author = "Jingrong Zhang and Zihao Wang and Zhiyong Liu and Fa
Zhang",
title = "Improve the Resolution and Parallel Performance of the
Three-Dimensional Refine Algorithm in {RELION} Using
{CUDA} and {MPI}",
journal = j-TCBB,
volume = "18",
number = "2",
pages = "583--595",
month = mar,
year = "2021",
CODEN = "ITCBCY",
DOI = "https://doi.org/10.1109/TCBB.2019.2929171",
ISSN = "1545-5963 (print), 1557-9964 (electronic)",
ISSN-L = "1545-5963",
bibdate = "Fri Mar 4 08:29:16 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/tcbb.bib",
URL = "https://dl.acm.org/doi/10.1109/TCBB.2019.2929171",
abstract = "In cryo-electron microscopy, RELION is a powerful tool
for high-resolution reconstruction. Due to the
complicated imaging procedure and the heterogeneity of
particles, some of the selected particle images offer
more disturbing information than others. \ldots{}",
acknowledgement = ack-nhfb,
fjournal = "IEEE/ACM Transactions on Computational Biology and
Bioinformatics",
journal-URL = "https://dl.acm.org/loi/tcbb",
}
@Article{Zhou:2021:HPG,
author = "Chao Zhou",
title = "High Performance Graph Data Imputation on Multiple
{GPUs}",
journal = j-FUTURE-INTERNET,
volume = "13",
number = "2",
pages = "36",
day = "31",
month = jan,
year = "2021",
CODEN = "????",
DOI = "https://doi.org/10.3390/fi13020036",
ISSN = "1999-5903",
bibdate = "Fri Feb 26 10:54:58 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/future-internet.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://www.mdpi.com/1999-5903/13/2/36",
abstract = "In real applications, massive data with graph
structures are often incomplete due to various
restrictions. Therefore, graph data imputation
algorithms have been widely used in the fields of
social networks, sensor networks, and MRI to solve the
graph data completion problem. To keep the data
relevant, a data structure is represented by a
graph-tensor, in which each matrix is the vertex value
of a weighted graph. The convolutional imputation
algorithm has been proposed to solve the low-rank
graph-tensor completion problem that some data matrices
are entirely unobserved. However, this data imputation
algorithm has limited application scope because it is
compute-intensive and low-performance on CPU. In this
paper, we propose a scheme to perform the convolutional
imputation algorithm with higher time performance on
GPUs (Graphics Processing Units) by exploiting
multi-core GPUs of CUDA architecture. We propose
optimization strategies to achieve coalesced memory
access for graph Fourier transform (GFT) computation
and improve the utilization of GPU SM resources for
singular value decomposition (SVD) computation.
Furthermore, we design a scheme to extend the
GPU-optimized implementation to multiple GPUs for
large-scale computing. Experimental results show that
the GPU implementation is both fast and accurate. On
synthetic data of varying sizes, the GPU-optimized
implementation running on a single Quadro RTX6000 GPU
achieves up to 60.50$ \times $ speedups over the
GPU-baseline implementation. The multi-GPU
implementation achieves up to 1.81$ \times $ speedups
on two GPUs versus the GPU-optimized implementation on
a single GPU. On the ego-Facebook dataset, the
GPU-optimized implementation achieves up to 77.88$
\times $ speedups over the GPU-baseline implementation.
Meanwhile, the GPU implementation and the CPU
implementation achieve similar, low recovery errors.",
acknowledgement = ack-nhfb,
journal-URL = "https://www.mdpi.com/1999-5903/",
remark = "Section Smart System Infrastructure and
Applications.",
}
@Article{Zhu:2021:POT,
author = "Zijie Zhu and Yongxian Wang and Xinghua Cheng",
title = "Parallel optimization of three-dimensional
wedge-shaped underwater acoustic propagation based on
{MPI + OpenMP} hybrid programming model",
journal = j-J-SUPERCOMPUTING,
volume = "77",
number = "5",
pages = "4988--5018",
month = may,
year = "2021",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-020-03466-w",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Fri May 14 09:20:00 MDT 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-020-03466-w",
acknowledgement = ack-nhfb,
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
online-date = "Published: 29 October 2020 Pages: 4988 - 5018",
}
@Article{Agathos:2022:CAA,
author = "Spiros N. Agathos and Vassilios V. Dimakopoulos and
Ilias K. Kasmeridis",
title = "Compiler-assisted, adaptive runtime system for the
support of {OpenMP} in embedded multicores",
journal = j-PARALLEL-COMPUTING,
volume = "110",
number = "??",
pages = "??--??",
month = may,
year = "2022",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2022.102895",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:18 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819122000035",
acknowledgement = ack-nhfb,
articleno = "102895",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Bak:2022:OAE,
author = "Seonmyeong Bak and Colleen Bertoni and Swen Boehm and
Reuben Budiardja and Barbara M. Chapman and Johannes
Doerfert and Markus Eisenbach and Hal Finkel and Oscar
Hernandez and Joseph Huber and Shintaro Iwasaki and
Vivek Kale and Paul R. C. Kent and JaeHyuk Kwack and
Meifeng Lin and Piotr Luszczek and Ye Luo and Buu Pham
and Swaroop Pophale and Kiran Ravikumar and Vivek
Sarkar and Thomas Scogland and Shilei Tian and P. K.
Yeung",
title = "{OpenMP} application experiences: Porting to
accelerated nodes",
journal = j-PARALLEL-COMPUTING,
volume = "109",
number = "??",
pages = "??--??",
month = mar,
year = "2022",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102856",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:18 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001009",
acknowledgement = ack-nhfb,
articleno = "102856",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Barai:2022:PMP,
author = "Atanu Barai and Yehia Arafa and Stephan Eidenbenz",
title = "\pkg{PPT-Multicore}: performance prediction of
{OpenMP} applications using reuse profiles and
analytical modeling",
journal = j-J-SUPERCOMPUTING,
volume = "78",
number = "2",
pages = "2354--2385",
month = feb,
year = "2022",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-021-03949-4",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:34 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-021-03949-4",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Bouhrour:2022:TLC,
author = "Stephane Bouhrour and Thibaut Pepin and Julien
Jaeger",
title = "Towards leveraging collective performance with the
support of {MPI 4.0} features in {MPC}",
journal = j-PARALLEL-COMPUTING,
volume = "109",
number = "??",
pages = "??--??",
month = mar,
year = "2022",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102860",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:18 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001034",
acknowledgement = ack-nhfb,
articleno = "102860",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Delmas:2022:MGI,
author = "Vincent Delmas and Azzedine Soula{\"\i}mani",
title = "Multi-{GPU} implementation of a time-explicit finite
volume solver using {CUDA} and a {CUDA}-Aware version
of {OpenMPI} with application to shallow water flows",
journal = j-COMP-PHYS-COMM,
volume = "271",
number = "??",
pages = "Article 108190",
month = feb,
year = "2022",
CODEN = "CPHCBZ",
DOI = "https://doi.org/10.1016/j.cpc.2021.108190",
ISSN = "0010-4655 (print), 1879-2944 (electronic)",
ISSN-L = "0010-4655",
bibdate = "Mon Dec 20 16:41:52 MST 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/compphyscomm2020.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0010465521003027",
acknowledgement = ack-nhfb,
fjournal = "Computer Physics Communications",
journal-URL = "http://www.sciencedirect.com/science/journal/00104655",
}
@Article{Dichev:2022:PLR,
author = "Kiril Dichev and Daniele {De Sensi} and Dimitrios S.
Nikolopoulos and Kirk W. Cameron and Ivor Spence",
title = "{Power Log n Roll}: Power-Efficient Localized Rollback
for {MPI} Applications Using Message Logging
Protocols",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "33",
number = "6",
pages = "1276--1288",
month = jun,
year = "2022",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2021.3107745",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Nov 9 11:11:37 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Du:2022:MPO,
author = "Qi Du and Hui Huang",
title = "{MPI} parameter optimization during debugging phase of
{HPC} system",
journal = j-J-SUPERCOMPUTING,
volume = "78",
number = "2",
pages = "1696--1711",
month = feb,
year = "2022",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-021-03939-6",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:34 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-021-03939-6",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Gonzalez-Dominguez:2022:MDP,
author = "Jorge Gonz{\'a}lez-Dom{\'\i}nguez and Jos{\'e} M.
Mart{\'\i}n-Mart{\'\i}nez and Roberto R. Exp{\'o}sito",
title = "\pkg{MPI-dot2dot}: A parallel tool to find {DNA}
tandem repeats on multicore clusters",
journal = j-J-SUPERCOMPUTING,
volume = "78",
number = "3",
pages = "4217--4235",
month = feb,
year = "2022",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-021-04025-7",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:34 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-021-04025-7",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Haghi:2022:RSH,
author = "Pouya Haghi and Anqi Guo and Qingqing Xiong and Chen
Yang and Tong Geng and Justin T. Broaddus and Ryan
Marshall and Derek Schafer and Anthony Skjellum and
Martin C. Herbordt",
title = "Reconfigurable switches for high performance and
flexible {MPI} collectives",
journal = j-CCPE,
volume = "34",
number = "6",
pages = "e6769:1--e6769:??",
day = "10",
month = mar,
year = "2022",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.6769",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 22 09:50:09 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurrency Computat., Pract. Exper.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "12 December 2021",
}
@Article{Huckelheim:2022:SSA,
author = "Jan H{\"u}ckelheim and Laurent Hasco{\"e}t",
title = "Source-to-Source Automatic Differentiation of {OpenMP}
Parallel Loops",
journal = j-TOMS,
volume = "48",
number = "1",
pages = "7:1--7:32",
month = mar,
year = "2022",
CODEN = "ACMSCU",
DOI = "https://doi.org/10.1145/3472796",
ISSN = "0098-3500 (print), 1557-7295 (electronic)",
ISSN-L = "0098-3500",
bibdate = "Thu Feb 17 08:00:57 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/toms.bib",
URL = "https://dl.acm.org/doi/10.1145/3472796",
abstract = "differentiation of OpenMP parallel worksharing loops
in forward and reverse mode. Automatic differentiation
is a method to obtain gradients of numerical programs,
which are crucial in optimization, uncertainty
quantification, and machine learning. The computational
cost to compute gradients is a common bottleneck in
practice. For applications that are parallelized for
multicore CPUs or GPUs using OpenMP, one also wishes to
compute the gradients in parallel. We propose a
framework to reason about the correctness of the
generated derivative code, from which we justify our
OpenMP extension to the differentiation model. We
implement this model in the automatic differentiation
tool Tapenade and present test cases that are
differentiated following our extended differentiation
procedure. Performance of the generated derivative
programs in forward and reverse mode is better than
sequential, although our reverse mode often scales
worse than the input programs.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Mathematical Software (TOMS)",
journal-URL = "https://dl.acm.org/loi/toms",
}
@Article{Jani:2022:HST,
author = "Kunal Jani and Ankit Kumar and Ronak Nahata",
title = "\pkg{Hpcfolder}: a simple tool used to parallelize
algorithms using the message passing interface
{(MPI)}",
journal = j-J-SUPERCOMPUTING,
volume = "78",
number = "1",
pages = "258--278",
month = jan,
year = "2022",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-021-03896-0",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:33 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-021-03896-0",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Janssen:2022:GPU,
author = "Dylan M. Janssen and Wayne Pullan and Alan Wee-Chung
Liew",
title = "Graphics processing unit acceleration of the island
model genetic algorithm using the {CUDA} programming
platform",
journal = j-CCPE,
volume = "34",
number = "2",
pages = "e6286:1--e6286:??",
day = "25",
month = jan,
year = "2022",
CODEN = "CCPEBO",
DOI = "https://doi.org/10.1002/cpe.6286",
ISSN = "1532-0626 (print), 1532-0634 (electronic)",
ISSN-L = "1532-0626",
bibdate = "Tue Feb 22 09:50:05 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ccpe.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
ajournal = "Concurrency Computat., Pract. Exper.",
fjournal = "Concurrency and Computation: Practice and Experience",
journal-URL = "http://www.interscience.wiley.com/jpages/1532-0626",
onlinedate = "31 March 2021",
}
@Article{Li:2022:CDC,
author = "Wentao Li and Zhiwen Chen and Xin He and Guoyun Duan
and Jianhua Sun and Hao Chen",
title = "{CVFuzz}: Detecting complexity vulnerabilities in
{OpenCL} kernels via automated pathological input
generation",
journal = j-FUT-GEN-COMP-SYS,
volume = "127",
number = "??",
pages = "384--395",
month = feb,
year = "2022",
CODEN = "FGSEVI",
DOI = "https://doi.org/10.1016/j.future.2021.09.006",
ISSN = "0167-739X (print), 1872-7115 (electronic)",
ISSN-L = "0167-739X",
bibdate = "Wed Feb 9 09:07:25 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/futgencompsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167739X21003526",
acknowledgement = ack-nhfb,
fjournal = "Future Generation Computer Systems",
journal-URL = "http://www.sciencedirect.com/science/journal/0167739X",
}
@Article{Meyer:2022:DFA,
author = "Marius Meyer and Tobias Kenter and Christian Plessl",
title = "In-depth {FPGA} accelerator performance evaluation
with single node benchmarks from the {HPC} challenge
benchmark suite for {Intel} and {Xilinx} {FPGAs} using
{OpenCL}",
journal = j-J-PAR-DIST-COMP,
volume = "160",
number = "??",
pages = "79--89",
month = feb,
year = "2022",
CODEN = "JPDCER",
DOI = "https://doi.org/10.1016/j.jpdc.2021.10.007",
ISSN = "0743-7315 (print), 1096-0848 (electronic)",
ISSN-L = "0743-7315",
bibdate = "Thu Feb 10 06:39:24 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jpardistcomp.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0743731521002057",
acknowledgement = ack-nhfb,
fjournal = "Journal of Parallel and Distributed Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/07437315",
}
@Article{Protze:2022:MDT,
author = "Joachim Protze and Marc-Andr{\'e} Hermanns and
Matthias S. M{\"u}ller and Van Man Nguyen and Julien
Jaeger and Emmanuelle Saillard and Patrick Carribault
and Denis Barthou",
title = "{MPI} detach --- Towards automatic asynchronous local
completion",
journal = j-PARALLEL-COMPUTING,
volume = "109",
number = "??",
pages = "??--??",
month = mar,
year = "2022",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102859",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:18 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001022",
acknowledgement = ack-nhfb,
articleno = "102859",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Article{Raskovalov:2022:AMD,
author = "Anton Raskovalov and Platon Surkov",
title = "{azTotMD 2.0}: {Molecular} dynamics with the radiative
thermostat and temperature-dependent force field
({CUDA} version)",
journal = j-SOFTWAREX,
volume = "17",
number = "??",
pages = "??--??",
month = jan,
year = "2022",
CODEN = "????",
DOI = "https://doi.org/10.1016/j.softx.2022.100995",
ISSN = "2352-7110",
ISSN-L = "2352-7110",
bibdate = "Mon Feb 28 10:41:25 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/softwarex.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S2352711022000127",
acknowledgement = ack-nhfb,
articleno = "100995",
fjournal = "SoftwareX",
journal-URL = "https://www.sciencedirect.com/journal/softwarex/issues",
}
@Article{Rocco:2022:LFR,
author = "Roberto Rocco and Davide Gadioli and Gianluca
Palermo",
title = "\pkg{Legio}: fault resiliency for embarrassingly
parallel {MPI} applications",
journal = j-J-SUPERCOMPUTING,
volume = "78",
number = "2",
pages = "2175--2195",
month = feb,
year = "2022",
CODEN = "JOSUED",
DOI = "https://doi.org/10.1007/s11227-021-03951-w",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Mon Feb 28 16:44:34 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/jsuper.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://link.springer.com/article/10.1007/s11227-021-03951-w",
acknowledgement = ack-nhfb,
ajournal = "J. Supercomputing",
fjournal = "The Journal of Supercomputing",
journal-URL = "http://link.springer.com/journal/11227",
}
@Article{Smith:2022:PAM,
author = "Matthew Smith and Arjen Tamerus and Phil Hasnip",
title = "Portable Acceleration of Materials Modeling Software:
{CASTEP}, {GPUs}, and {OpenACC}",
journal = j-COMPUT-SCI-ENG,
volume = "24",
number = "1",
pages = "46--55",
month = jan # "\slash " # feb,
year = "2022",
CODEN = "CSENFA",
DOI = "https://doi.org/10.1109/MCSE.2022.3141714",
ISSN = "1521-9615 (print), 1558-366X (electronic)",
ISSN-L = "1521-9615",
bibdate = "Thu Mar 17 07:23:22 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/computscieng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "Computing in Science and Engineering",
journal-URL = "http://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=5992",
}
@Article{Zhao:2022:SGM,
author = "Chen Zhao and Wu Gao and Feiping Nie and Huiyang
Zhou",
title = "A Survey of {GPU} Multitasking Methods Supported by
Hardware Architecture",
journal = j-IEEE-TRANS-PAR-DIST-SYS,
volume = "33",
number = "6",
pages = "1451--1463",
month = jun,
year = "2022",
CODEN = "ITDSEO",
DOI = "https://doi.org/10.1109/TPDS.2021.3115630",
ISSN = "1045-9219 (print), 1558-2183 (electronic)",
ISSN-L = "1045-9219",
bibdate = "Tue Nov 9 11:11:37 2021",
bibsource = "http://www.math.utah.edu/pub/tex/bib/ieeetranspardistsys.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
fjournal = "IEEE Transactions on Parallel and Distributed
Systems",
journal-URL = "https://ieeexplore.ieee.org/xpl/RecentIssue.jsp?punumber=71",
}
@Article{Zhong:2022:ULV,
author = "Dong Zhong and Qinglei Cao and George Bosilca and Jack
Dongarra",
title = "Using long vector extensions for {MPI} reductions",
journal = j-PARALLEL-COMPUTING,
volume = "109",
number = "??",
pages = "??--??",
month = mar,
year = "2022",
CODEN = "PACOEJ",
DOI = "https://doi.org/10.1016/j.parco.2021.102871",
ISSN = "0167-8191 (print), 1872-7336 (electronic)",
ISSN-L = "0167-8191",
bibdate = "Fri Feb 18 10:07:18 MST 2022",
bibsource = "http://www.math.utah.edu/pub/tex/bib/parallelcomputing.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sciencedirect.com/science/article/pii/S0167819121001137",
acknowledgement = ack-nhfb,
articleno = "102871",
fjournal = "Parallel Computing",
journal-URL = "http://www.sciencedirect.com/science/journal/01678191",
}
@Proceedings{Anonymous:1989:PFC,
editor = "Anonymous",
booktitle = "{Proceedings of the Fourth Conference on Hypercubes,
Concurrent Computers and Applications, 6--8 March 1989,
Monterey, CA, USA}",
title = "{Proceedings of the Fourth Conference on Hypercubes,
Concurrent Computers and Applications, 6--8 March 1989,
Monterey, CA, USA}",
publisher = "Golden Gate Enterprises",
address = "Los Altos, CA, USA",
pages = "xiv + 1362",
year = "1989",
ISBN = "",
ISBN-13 = "",
LCCN = "QA76.5.C619215 1989",
bibdate = "Sun Dec 22 10:16:53 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes",
acknowledgement = ack-nhfb,
confsponsor = "D.O.E.; US Air Force; NASA",
}
@Proceedings{ACM:1990:PAC,
editor = "{ACM}",
booktitle = "{Proceedings of the 1990 ACM Conference on LISP and
Functional Programming: papers presented at the
conference, Nice, France, June 27--29, 1990}",
title = "{Proceedings of the 1990 ACM Conference on LISP and
Functional Programming: papers presented at the
conference, Nice, France, June 27--29, 1990}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "viii + 348",
year = "1990",
ISBN = "0-89791-368-X",
ISBN-13 = "978-0-89791-368-3",
LCCN = "QA 76.73 L23 A24 1990",
bibdate = "Wed Apr 16 07:21:40 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM order no. 552900.",
acknowledgement = ack-nhfb,
confsponsor = "ACM",
}
@Proceedings{Bhavsar:1991:SSJ,
editor = "Virendrakumar Chhabulal Bhavsar and Uday Govinddas
Gujar",
booktitle = "{Supercomputing Symposium '91, June 3--5, 1991,
Fredericton, NB, Canada: symposium proceedings}",
title = "{Supercomputing Symposium '91, June 3--5, 1991,
Fredericton, NB, Canada: symposium proceedings}",
publisher = "University of New Brunswick Press",
address = "Fredericton, NB, Canada",
pages = "x + 544",
year = "1991",
ISBN = "0-920114-14-8",
ISBN-13 = "978-0-920114-14-8",
LCCN = "QA76.88.S87 1991",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
corpsource = "Centre for Dev. of Adv. Comput., Bangalore, India",
pubcountry = "Canada",
treatment = "P Practical",
}
@Proceedings{Durand:1991:HPC,
editor = "M. Durand and F. {El Dabaghi}",
booktitle = "{High performance computing, II: proceedings of the
Second Symposium on High Performance Computing,
Montpellier, France, 7--9 October, 1991}",
title = "{High performance computing, II: proceedings of the
Second Symposium on High Performance Computing,
Montpellier, France, 7--9 October, 1991}",
publisher = pub-NH,
address = pub-NH:adr,
pages = "xii + 673",
year = "1991",
ISBN = "0-444-89224-9",
ISBN-13 = "978-0-444-89224-9",
LCCN = "QA75.5.I585 1991",
bibdate = "Sun Dec 22 10:17:16 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
numericalindex = "Computer speed 2.0E+07 to 6.0E+07 FLOPS",
pubcountry = "Netherlands",
}
@Proceedings{IEEE:1991:PSA,
editor = "{IEEE}",
key = "Supercomputing '91",
booktitle = "{Proceedings, Supercomputing '91: Albuquerque, New
Mexico, November 18--22, 1991}",
title = "{Proceedings, Supercomputing '91: Albuquerque, New
Mexico, November 18--22, 1991}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxiii + 917",
year = "1991",
ISBN = "0-8186-9158-1 (IEEE: case), 0-8186-2158-3 (IEEE:
paper), 0-8186-6158-5 (IEEE: microfiche), 0-89791-459-7
(ACM)",
ISBN-13 = "978-0-8186-9158-4 (IEEE: case), 978-0-8186-2158-1
(IEEE: paper), 978-0-8186-6158-7 (IEEE: microfiche),
978-0-89791-459-8 (ACM)",
LCCN = "QA76.5 .S894 1991",
bibdate = "Mon Jan 15 11:05:59 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 91CH3058-5.",
}
@Proceedings{Stout:1991:SDM,
editor = "Quentin F. Stout and Michael Joseph Wolfe",
booktitle = "{The Sixth Distributed Memory Computing Conference
proceedings April 28--May 1, 1991, Portland, Oregon}",
title = "{The Sixth Distributed Memory Computing Conference
proceedings April 28--May 1, 1991, Portland, Oregon}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xx + 736",
year = "1991",
ISBN = "0-8186-2291-1",
ISBN-13 = "978-0-8186-2291-5",
LCCN = "QA76.5 .D58 1991",
bibdate = "Tue Jan 16 07:21:24 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Anonymous:1992:PSE,
editor = "Anonymous",
booktitle = "{Proceedings SHARE Europe Anniversary Meeting}",
title = "{Proceedings SHARE Europe Anniversary Meeting}",
publisher = "SHARE Eur. Assoc",
address = "Geneva, Switzerland",
pages = "752",
year = "1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "28 Sept.--2 Oct. 1992",
conflocation = "Davos, Switzerland",
pubcountry = "Switzerland",
}
@Proceedings{Dongarra:1992:PFS,
editor = "J. Dongarra and P. Kennedy and P. Messina and D. C.
Sorensen and R. G. Voigt",
booktitle = "{Proceedings of the Fifth SIAM Conference on Parallel
Processing for Scientific Computing, 25--27 March 1991,
Houston, TX, USA}",
title = "{Proceedings of the Fifth SIAM Conference on Parallel
Processing for Scientific Computing, 25--27 March 1991,
Houston, TX, USA}",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "xvii + 648",
year = "1992",
ISBN = "0-89871-303-X",
ISBN-13 = "978-0-89871-303-9",
LCCN = "QA76.58.P76 1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confsponsor = "SIAM",
}
@Proceedings{Evans:1992:PCP,
editor = "D. J. Evans and G. R. Joubert and H. Liddell",
booktitle = "{Parallel computing '91: proceedings of the
International Conference on Parallel Computing '91,
London, UK, 3--6 September 1991}",
title = "{Parallel computing '91: proceedings of the
International Conference on Parallel Computing '91,
London, UK, 3--6 September 1991}",
volume = "4",
publisher = pub-NH,
address = pub-NH:adr,
pages = "xi + 628",
year = "1992",
ISBN = "0-444-89212-5",
ISBN-13 = "978-0-444-89212-6",
LCCN = "QA76.58.I545 1991",
bibdate = "Sun Dec 22 10:17:16 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Advances in parallel computing",
acknowledgement = ack-nhfb,
confsponsor = "Elsevier Sci. Publishers; Maspar Comput. Corp.; NCUBE;
Office Naval Res. Eur. Office; Transtech",
numericalindex = "Byte rate 6.0E+06 Byte/s",
pubcountry = "Netherlands",
}
@Proceedings{Ferenczi:1992:AHW,
editor = "S. Ferenczi",
booktitle = "{1st Austrian-Hungarian Workshop on Transporter
Applications. Proceedings}",
title = "{1st Austrian-Hungarian Workshop on Transporter
Applications. Proceedings}",
publisher = "Hungarian Acad.of Sci",
address = "Budapest, Hungary",
pages = "v + 117",
year = "1992",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "8--10 Oct. 1992",
conflocation = "Sopron, Hungary",
pubcountry = "Hungary",
}
@Proceedings{IEEE:1992:PSH,
editor = "{IEEE}",
booktitle = "{Proceedings / Scalable High Performance Computing
Conference, SHPCC-92, April 26--29, 1992, Williamsburg,
Virginia}",
title = "{Proceedings / Scalable High Performance Computing
Conference, SHPCC-92, April 26--29, 1992, Williamsburg,
Virginia}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiii + 448",
year = "1992",
ISBN = "0-8186-2775-1",
ISBN-13 = "978-0-8186-2775-0",
LCCN = "QA76.76.A65S33 1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 92TH0432-5.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE",
}
@Proceedings{Russell:1992:CMW,
editor = "Thomas F. Russell and others",
booktitle = "{Computational methods in water resources IX:
Proceedings of the Ninth International Conference on
Computational Methods in Water Resources, held at the
University of Colorado, Denver, in June 1992}",
title = "{Computational methods in water resources IX:
Proceedings of the Ninth International Conference on
Computational Methods in Water Resources, held at the
University of Colorado, Denver, in June 1992}",
publisher = pub-ELSAS,
address = pub-ELSAS:adr,
pages = "various",
year = "1992",
ISBN = "1-85166-871-3 (set), 1-85312-169-X (set: Computational
Mechanics Publications, Southampton), 1-56252-098-9
(set: Computational Mechanics Publications, Boston),
1-85166-791-1 (v. 1: Elsevier Applied Science),
1-85312-197-5 (v. 1: Computational Mechanics
Publications, Southampton), 1-56252-123-3 (v. 1:
Computational Mechanics Publications, New York),
1-85166-870-5 (v. 2), 1-85312-198-3 (v. 2),
1-56252-124-1 (v. 2)",
ISBN-13 = "978-1-85166-871-7 (set), 978-1-85312-169-2 (set:
Computational Mechanics Publications, Southampton),
978-1-56252-098-4 (set: Computational Mechanics
Publications, Boston), 978-1-85166-791-8 (v. 1:
Elsevier Applied Science), 978-1-85312-197-5 (v. 1:
Computational Mechanics Publications, Southampton),
978-1-56252-123-3 (v. 1: Computational Mechanics
Publications, New York), 978-1-85166-870-0 (v. 2),
978-1-85312-198-2 (v. 2), 978-1-56252-124-0 (v. 2)",
LCCN = "GB656.2.E42 C65 1992 v.1-2 (c1992)",
bibdate = "Mon Jan 15 18:04:49 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
acknowledgement = ack-nhfb,
}
@Proceedings{SCRI:1992:PWC,
key = "SCRI WCC'92",
booktitle = "{Proceedings of the Workshop on Cluster Computing}",
title = "{Proceedings of the Workshop on Cluster Computing}",
publisher = pub-SCRI,
address = pub-SCRI:adr,
pages = "??",
month = dec,
year = "1992",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Tue Jan 16 07:34:08 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Proceedings available via anonymous ftp from
\path=ftp.scri.fsu.edu= in directory
\path=pub/parallel-workshop.92=.",
acknowledgement = ack-nhfb,
}
@Proceedings{Siegel:1992:FFS,
editor = "H. J. Siegel",
booktitle = "{Frontiers '92, the Fourth Symposium on the Frontiers
of Massive Parallel Computation, October 19--21, 1992,
McLean, Virginia}",
title = "{Frontiers '92, the Fourth Symposium on the Frontiers
of Massive Parallel Computation, October 19--21, 1992,
McLean, Virginia}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 592",
year = "1992",
ISBN = "0-8186-2772-7",
ISBN-13 = "978-0-8186-2772-9",
LCCN = "QA76.58.S95 1992",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 92CH3185-6.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE; NASA",
}
@Proceedings{Siegel:1992:FSF,
editor = "H. J. Siegel",
booktitle = "{The Fourth Symposium on the Frontiers of Massively
Parallel Computation: Frontiers '92 / October 19--21,
1992, McLean Virginia}",
title = "{The Fourth Symposium on the Frontiers of Massively
Parallel Computation: Frontiers '92 / October 19--21,
1992, McLean Virginia}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 592",
year = "1992",
ISBN = "0-8186-2772-7",
ISBN-13 = "978-0-8186-2772-9",
LCCN = "QA76.58.S95 1992",
bibdate = "Wed Apr 16 07:25:17 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 92CH3185-6.",
acknowledgement = ack-nhfb,
}
@Proceedings{Verkerk:1992:PIC,
editor = "C. Verkerk and W. Wojcik",
booktitle = "{Proceedings of the International Conference on
Computing in High Energy Physics '92, Annecy, France,
21--25 September 1992}",
title = "{Proceedings of the International Conference on
Computing in High Energy Physics '92, Annecy, France,
21--25 September 1992}",
publisher = "CERN",
address = "Geneve, Switzerland",
pages = "xxiii + 916",
year = "1992",
ISBN = "92-9083-049-2",
ISBN-13 = "978-92-9083-049-8",
LCCN = "QC783.3 C65 1992",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "CERN report 92-07.",
acknowledgement = ack-nhfb,
pubcountry = "Switzerland",
}
@Proceedings{Anonymous:1993:ATA,
editor = "Anonymous",
booktitle = "{Automotive technology and automation: Supercomputer
applications in the automotive industries: 26th
International symposium --- September 1993, Aachen,
Germany}",
title = "{Automotive technology and automation: Supercomputer
applications in the automotive industries: 26th
International symposium --- September 1993, Aachen,
Germany}",
publisher = "Automotive Automation Ltd",
address = "Croydon, UK",
pages = "????",
year = "1993",
ISBN = "0-947719-62-8",
ISBN-13 = "978-0-947719-62-3",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "ISATA --- Proceedings --- 26th",
acknowledgement = ack-nhfb,
sponsor = "ISATA. ENEA; Agency: Italy.",
}
@Proceedings{Anonymous:1993:CDP,
editor = "Anonymous",
booktitle = "{The commercial dimensions of parallel computing:
UNICOM seminar --- April 1993, London}",
title = "{The commercial dimensions of parallel computing:
UNICOM seminar --- April 1993, London}",
publisher = "Unicom Seminars Ltd",
address = "????",
pages = "????",
year = "1993",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Anonymous:1993:ISA,
editor = "Anonymous",
booktitle = "{International section: Annual conference ---
September 1993, Gallipoli, Italy}",
title = "{International section: Annual conference ---
September 1993, Gallipoli, Italy}",
publisher = "AICA",
address = "????",
pages = "????",
year = "1993",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Atti del Congresso Annuale --- Associazione Italiana
per l'Informatica ed il Calcolo Automatico 1993",
acknowledgement = ack-nhfb,
sponsor = "Italian Association for Informatics and Automatic
Computation.",
}
@Proceedings{Anonymous:1993:JFI,
editor = "Anonymous",
booktitle = "{Joint framework for information technology: Technical
conference --- March 1993, Keele}",
title = "{Joint framework for information technology: Technical
conference --- March 1993, Keele}",
publisher = "Dept. of Trade and Industry, Information and
Manufacturing Division",
address = "London, UK",
pages = "????",
year = "1993",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "JFIT Technical Conference Digest",
acknowledgement = ack-nhfb,
sponsor = "Great Britain; Department of Trade and Industry.
Science and Engineering Research Council.",
}
@Proceedings{Anonymous:1993:PSE,
editor = "Anonymous",
booktitle = "{Proceedings. SHARE Europe Anniversary Meeting.
Client/Server --- the Promise and the Reality: October
25--28, 1993, the Hague, the Netherlands}",
title = "{Proceedings. SHARE Europe Anniversary Meeting.
Client/Server --- the Promise and the Reality: October
25--28, 1993, the Hague, the Netherlands}",
publisher = "SHARE Europe",
address = "Geneva, Switzerland",
pages = "xxi + 1002",
year = "1993",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0254-6213",
LCCN = "????",
bibdate = "Wed Apr 16 11:45:17 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Anonymous:1993:SEC,
editor = "Anonymous",
booktitle = "{Supercomputing Europe '93. Conference Papers}",
title = "{Supercomputing Europe '93. Conference Papers}",
publisher = "Royal Dutch Fairs",
address = "Utrecht, Netherlands",
pages = "251",
year = "1993",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "C5220P (Parallel architecture); C5440
(Multiprocessor systems and techniques); C7300 (Natural
sciences); C7400 (Engineering)",
confdate = "22--24 Feb. 1993",
conflocation = "Utrecht, Netherlands",
keywords = "Aerospace applications; High Performance Fortran;
Parallel architectures; Parallel software; Scientific
applications; Scientific visualisation; Superconducting
environments; Workstation clusters",
pubcountry = "Netherlands",
thesaurus = "Engineering computing; Natural sciences computing;
Parallel architectures; Parallel processing; Software
engineering",
}
@Proceedings{Bhargava:1993:PIW,
editor = "Bharat Bhargava",
booktitle = "{Proceedings of the IEEE Workshop on Advances in
Parallel and Distributed Systems, October 6, 1993,
Princeton, New Jersey}",
title = "{Proceedings of the IEEE Workshop on Advances in
Parallel and Distributed Systems, October 6, 1993,
Princeton, New Jersey}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "viii + 170",
year = "1993",
ISBN = "0-8186-5250-0, 0-8186-5251-9",
ISBN-13 = "978-0-8186-5250-9, 978-0-8186-5251-6",
LCCN = "QA76.58.I444 1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "IEEE. Computer Society. Technical Committee on
Distributed Processing.",
}
@Proceedings{Brebbia:1993:ASE,
editor = "C. A. Brebbia and H. Power",
booktitle = "{Applications of Supercomputers in Engineering III,
27--29 September 1993, Bath, UK}",
title = "{Applications of Supercomputers in Engineering III,
27--29 September 1993, Bath, UK}",
publisher = "Computational Mechanics Publication",
address = "London, UK",
pages = "561",
year = "1993",
ISBN = "1-85312-236-X",
ISBN-13 = "978-1-85312-236-1",
LCCN = "TA345.I556 1993",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Gawman:1993:PCT,
editor = "Ann Gawman and W. Morven Gentleman and E. Kidd and
Per-{\AA}ke Larson and J. Slonim",
booktitle = "{Proceedings CASCON '93: Toronto, Ontario, Canada,
24--28 October 1993}",
title = "{Proceedings CASCON '93: Toronto, Ontario, Canada,
24--28 October 1993}",
publisher = "Nat. Res. Council of Canada",
address = "Ottawa, Ont., Canada",
pages = "xx + 1180",
year = "1993",
ISBN = "????",
ISBN-13 = "????",
LCCN = "QA76.76.S64 C378 1993 v.1-2",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
acknowledgement = ack-nhfb,
pubcountry = "Canada",
}
@Proceedings{Grebe:1993:TAS,
editor = "R. Grebe and J. Hektor and S. C. Hilton and M. R. Jane
and P. H. Welch",
booktitle = "{Transputer applications and systems '93: proceedings
of the 1993 World Transputer Congress, 20--22 September
1993, Aachen, Germany}",
title = "{Transputer applications and systems '93: proceedings
of the 1993 World Transputer Congress, 20--22 September
1993, Aachen, Germany}",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "1317",
year = "1993",
ISBN = "90-5199-140-1",
ISBN-13 = "978-90-5199-140-6",
LCCN = "????",
bibdate = "Wed Apr 16 11:39:32 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "20--22 Sept. 1993",
conflocation = "Aachen, Germany",
pubcountry = "Netherlands",
}
@Proceedings{Hoffmann:1993:PFE,
editor = "Geerd-R. Hoffmann and Tuomo Kauranne",
booktitle = "{Proceedings of the Fifth ECMWF Workshop on the Use of
Parallel Processors in Meteorology. Parallel
Supercomputing in Atmospheric Science}",
title = "{Proceedings of the Fifth ECMWF Workshop on the Use of
Parallel Processors in Meteorology. Parallel
Supercomputing in Atmospheric Science}",
publisher = pub-WORLD-SCI,
address = pub-WORLD-SCI:adr,
pages = "ix + 532",
year = "1993",
ISBN = "981-02-1429-4",
ISBN-13 = "978-981-02-1429-6",
LCCN = "QA76.58 E354 1992",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "23--27 Nov. 1992",
conflocation = "Reading, UK",
pubcountry = "Singapore",
}
@Proceedings{IEEE:1993:DPC,
editor = "{IEEE}",
booktitle = "{Digest of papers: Compcon spring '93, San Francisco,
California, February 22--26, 1993}",
title = "{Digest of papers: Compcon spring '93, San Francisco,
California, February 22--26, 1993}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xv + 609",
year = "1993",
ISBN = "0-8186-3400-6",
ISBN-13 = "978-0-8186-3400-0",
LCCN = "QA75.5.C58 1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 93CH3251-6.",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1993:PFW,
editor = "{IEEE}",
booktitle = "{Proceedings of the Fourth Workshop on Future Trends
of Distributed Computing Systems, September 22--24,
1993, Lisbon, Portugal}",
title = "{Proceedings of the Fourth Workshop on Future Trends
of Distributed Computing Systems, September 22--24,
1993, Lisbon, Portugal}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "x + 485",
year = "1993",
ISBN = "0-8186-4430-3",
ISBN-13 = "978-0-8186-4430-6",
LCCN = "QA76.9.D5I335 1993",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 93TH0574-4.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
}
@Proceedings{IEEE:1993:PIS,
editor = "{IEEE}",
booktitle = "{Proceedings of the 2nd International Symposium on
High Performance Distributed Computing, July 20--23,
1993, Spokane, Washington, Cavanaugh's Inn at the
Park}",
title = "{Proceedings of the 2nd International Symposium on
High Performance Distributed Computing, July 20--23,
1993, Spokane, Washington, Cavanaugh's Inn at the
Park}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiv + 353",
year = "1993",
ISBN = "0-8186-3900-8, 0-8186-3901-6",
ISBN-13 = "978-0-8186-3900-5, 978-0-8186-3901-2",
LCCN = "QA76.9.D5I593 1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 93TH0550-4.",
series = "Proceedings of the International Symposium on High
Performance Distributed Computing 2nd",
acknowledgement = ack-nhfb,
sponsor = "IEEE Computer Society. Syracuse University; Northeast
Parallel Architectures Center. Washington State
University.",
}
@Proceedings{IEEE:1993:PSI,
editor = "{IEEE}",
booktitle = "{Proceedings / Seventh International Parallel
Processing Symposium, April 13--16, 1993, Newport
Beach, California}",
title = "{Proceedings / Seventh International Parallel
Processing Symposium, April 13--16, 1993, Newport
Beach, California}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xx + 858",
year = "1993",
ISBN = "0-8186-3442-1",
ISBN-13 = "978-0-8186-3442-0",
LCCN = "QA 76.58 I56 1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 93TH0513-2.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc.; ACM Sigarch",
}
@Proceedings{IEEE:1993:PSP,
editor = "{IEEE}",
key = "Supercomputing'93",
booktitle = "{Proceedings, Supercomputing '93: Portland, Oregon,
November 15--19, 1993}",
title = "{Proceedings, Supercomputing '93: Portland, Oregon,
November 15--19, 1993}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxii + 935",
year = "1993",
ISBN = "0-8186-4340-4 (paperback), 0-8186-4341-2 (microfiche),
0-8186-4342-0 (hardback), 0-8186-4346-3 (CD-ROM)",
ISBN-13 = "978-0-8186-4340-8 (paperback), 978-0-8186-4341-5
(microfiche), 978-0-8186-4342-2 (hardback),
978-0-8186-4346-0 (CD-ROM)",
ISSN = "1063-9535",
LCCN = "QA76.5 .S96 1993",
bibdate = "Mon Jan 15 11:06:21 1996",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
classification = "631.1; 722.1; 722.3; 722.4; 723.2; 921.6",
keywords = "Algorithms; Cache coherence; Clustered workstations;
Computer graphics; Computer networks; Computer
programming languages; Data parallel compilers; Data
partitioning; Distributed computer systems; Eigenvalues
and eigenfunctions; Finite element method; Flow
visualization; Fluid mechanics; Linear algebra; Mass
storage; Massively parallel processors; Natural
sciences computing; Parallel languages; Parallel
processing systems; Parallel rendering; Program
compilers; Quantum theory; Scheduling; Sparse matrices;
Supercomputers",
sponsor = "Institute of Electrical and Electronics Engineers;
Computer Society. Association for Computing Machinery;
SIGARCH.",
}
@Proceedings{IEEE:1993:WHP,
editor = "{IEEE}",
key = "WHP'92",
booktitle = "{Workshop on Heterogeneous Processing (1992: Beverly
Hills, Calif.) Proceedings / Workshop on Heterogeneous
Processing, March 23, 1992, Beverly Hills,
California}",
title = "{Workshop on Heterogeneous Processing (1992: Beverly
Hills, Calif.) Proceedings / Workshop on Heterogeneous
Processing, March 23, 1992, Beverly Hills,
California}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "ix + 101",
year = "1993",
ISBN = "0-8186-2702-6",
ISBN-13 = "978-0-8186-2702-6",
LCCN = "QA76.58 .W654 1992",
bibdate = "Tue Jan 16 07:27:01 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Kowalik:1993:SPC,
editor = "Janusz S. Kowalik and Lucio Grandinetti",
booktitle = "{Software for parallel computation: Proceedings of the
NATO Advanced Workshop on Software for Parallel
Computation, held at Cetraro, Cosenza, Italy, June
22--26, 1992}",
title = "{Software for parallel computation: Proceedings of the
NATO Advanced Workshop on Software for Parallel
Computation, held at Cetraro, Cosenza, Italy, June
22--26, 1992}",
volume = "106",
publisher = pub-SV,
address = pub-SV:adr,
pages = "ix + 363",
year = "1993",
ISBN = "3-540-56451-9 (Berlin), 0-387-56451-9 (New York)",
ISBN-13 = "978-3-540-56451-5 (Berlin), 978-0-387-56451-7 (New
York)",
LCCN = "QA76.58 .S629 1993",
bibdate = "Sat Feb 24 09:43:28 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "NATO ASI series. Series F, Computer and systems
sciences",
acknowledgement = ack-nhfb,
}
@Proceedings{Law:1993:EDM,
editor = "K. H. Law and R. E. Fulton and others",
booktitle = "{Engineering data management: key to success in a
global market: proceedings of the 1993 ASME
International Computers in Engineering Conference and
Exposition, August 8--12, San Diego, California}",
title = "{Engineering data management: key to success in a
global market: proceedings of the 1993 ASME
International Computers in Engineering Conference and
Exposition, August 8--12, San Diego, California}",
publisher = pub-ASME,
address = pub-ASME:adr,
pages = "vi + 273",
year = "1993",
ISBN = "0-7918-1169-7",
ISBN-13 = "978-0-7918-1169-6",
LCCN = "TA345.A86 1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "COMPUTERS IN ENGINEERING VOL COM",
acknowledgement = ack-nhfb,
sponsor = "ASME; Computers in Engineering Division.",
}
@Proceedings{Mudge:1993:PTS,
editor = "T. N. Mudge and V. Milutinovic and L. Hunter",
booktitle = "{Proceedings of the Twenty-Sixth Hawaii International
Conference on System Science (HICSS-26), held in
Wailea, Hawaii in January 5--8, 1993}",
title = "{Proceedings of the Twenty-Sixth Hawaii International
Conference on System Science (HICSS-26), held in
Wailea, Hawaii in January 5--8, 1993}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvi + 895 (vol. 1), xiv + 691 (vol. 2), xii + 654
(vol. 3), xv + 889 (vol. 4)",
year = "1993",
ISBN = "0-8186-3230-5",
ISBN-13 = "978-0-8186-3230-3",
LCCN = "????",
bibdate = "Wed Apr 16 11:35:41 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Four volumes. IEEE catalog number 93TH0501-7.",
acknowledgement = ack-nhfb,
}
@Proceedings{Schill:1993:DOD,
editor = "Alexander Schill",
booktitle = "{DCE} --- the {OSF} distributed computing environment:
client\slash server model and beyond: {International
DCE Workshop, Karlsruhe, Germany, October 7--8, 1993:
proceedings}",
title = "{DCE} --- the {OSF} distributed computing environment:
client\slash server model and beyond: {International
DCE Workshop, Karlsruhe, Germany, October 7--8, 1993:
proceedings}",
number = "731",
publisher = pub-SV,
address = pub-SV:adr,
pages = "283",
year = "1993",
ISBN = "3-540-57306-2, 0-387-57306-2",
ISBN-13 = "978-3-540-57306-7, 978-0-387-57306-9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.9.C55I58 1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
sponsor = "German Association of Computer Science.",
}
@Proceedings{Sincovec:1993:SCP,
editor = "Richard F. Sincovec",
booktitle = "{SIAM Conference on Parallel Processing for Scientific
Computing (6th: 1993: Norfolk, VA, USA)}",
title = "{SIAM Conference on Parallel Processing for Scientific
Computing (6th: 1993: Norfolk, VA, USA)}",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "xix + 1041 + iv",
year = "1993",
ISBN = "0-89871-315-3",
ISBN-13 = "978-0-89871-315-2",
LCCN = "QA 76.58 S55 1993",
bibdate = "Wed Aug 14 10:36:11 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
acknowledgement = ack-nhfb,
sponsor = "Society for Industrial and Applied Mathematics.",
}
@Proceedings{Volkert:1993:PCS,
editor = "Jens Volkert",
booktitle = "{Parallel computation: Second International ACPC
Conference, Gmunden, Austria, October 4--6, 1993:
proceedings}",
title = "{Parallel computation: Second International ACPC
Conference, Gmunden, Austria, October 4--6, 1993:
proceedings}",
volume = "734",
publisher = pub-SV,
address = pub-SV:adr,
pages = "viii + 248",
year = "1993",
ISBN = "3-540-57314-3 (Berlin), 0-387-57314-3 (New York)",
ISBN-13 = "978-3-540-57314-2 (Berlin), 978-0-387-57314-4 (New
York)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA267.A1 L43 no.734",
bibdate = "Wed Apr 16 11:41:47 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "DM58.00",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "parallel processing (electronic computers) --
congresses",
sponsor = "Austrian Center for Parallel Computation.",
}
@Proceedings{Yelon:1993:PTS,
editor = "W. B. Yelon and others",
booktitle = "{Proceedings of the Thirty-seventh Annual Conference
on Magnetism and Magnetic Materials: December 1--4,
1992, Houston, Texas}",
title = "{Proceedings of the Thirty-seventh Annual Conference
on Magnetism and Magnetic Materials: December 1--4,
1992, Houston, Texas}",
volume = "73(10)",
publisher = pub-AIP,
address = pub-AIP:adr,
pages = "5309--7023",
month = may,
year = "1993",
CODEN = "JAPIAU",
ISBN = "1-56396-212-8",
ISBN-13 = "978-1-56396-212-7",
ISSN = "0021-8979 (print), 1089-7550 (electronic), 1520-8850",
LCCN = "QC753 .C748 1990",
bibdate = "Sun Dec 22 10:17:40 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
series = j-J-APPL-PHYS,
acknowledgement = ack-nhfb,
confsponsor = "AIP; IEEE",
}
@Proceedings{ACM:1994:CPI,
editor = "{ACM}",
booktitle = "{Conference Proceedings. 1994 International Conference
on Supercomputing}",
title = "{Conference Proceedings. 1994 International Conference
on Supercomputing}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xii + 439",
year = "1994",
ISBN = "0-89791-665-4",
ISBN-13 = "978-0-89791-665-3",
LCCN = "????",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/contents/proceedings/supercomputing/181181/",
acknowledgement = ack-nhfb,
confdate = "11--15 July 1994",
conflocation = "Manchester, UK",
confsponsor = "ACM",
}
@Proceedings{Agrawal:1994:PIC,
editor = "Dharma P. Agrawal and K. C. (Kuo Chung) Tai and
Jagdish Chandra",
booktitle = "{Proceedings of the 1994 International Conference on
Parallel Processing, August 15--19, 1994. Vol 3:
Algorithms and applications}",
title = "{Proceedings of the 1994 International Conference on
Parallel Processing, August 15--19, 1994. Vol 3:
Algorithms and applications}",
publisher = pub-CRC,
address = pub-CRC:adr,
pages = "xvii + 301 (vol. 1), xviii + 323 (vol. 2), 297 (vol.
3)",
year = "1994",
ISBN = "0-8493-2496-3, 0-8493-2495-5",
ISBN-13 = "978-0-8493-2496-3, 978-0-8493-2495-6",
ISSN = "0190-3918",
LCCN = "QA 76.58 I55 1994",
bibdate = "Wed Aug 14 10:37:00 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Three volumes.",
acknowledgement = ack-nhfb,
}
@Proceedings{Anonymous:1994:FWR,
editor = "Anonymous",
booktitle = "{Forschung und wissenschaftliches Rechnen: Beitrage
anasslich des 10. EDV-Benutzertreffens der
Max-Planck-Gesellschaft in G{\"o}ttingen, November
1993}",
title = "{Forschung und wissenschaftliches Rechnen: Beitrage
anasslich des 10. EDV-Benutzertreffens der
Max-Planck-Gesellschaft in G{\"o}ttingen, November
1993}",
number = "1",
publisher = "Max-Planck-Gesellschaft",
address = "M{\"u}nchen, Germany",
pages = "270",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
ISSN = "0341-7778",
LCCN = "Q180.55.E4 M39 1993",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Berichte und Mitteilungen --- Max Planck
Gesellschaft",
acknowledgement = ack-nhfb,
sponsor = "Max-Planck-Gesellschaft.",
}
@Proceedings{Anonymous:1994:ICS,
editor = "Anonymous",
booktitle = "{1994 International Computer Symposium Conference
Proceedings}",
title = "{1994 International Computer Symposium Conference
Proceedings}",
publisher = "Nat. Chiao Tung Univ",
address = "Hsinchu, Taiwan",
pages = "xvi + 1310",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "2 vol.",
acknowledgement = ack-nhfb,
confdate = "12--15 Dec. 1994",
conflocation = "Hsinchu, Taiwan",
confsponsor = "Ministr. Educ.; Comput. Soc",
pubcountry = "Taiwan",
}
@Proceedings{Anonymous:1994:PDC,
editor = "Anonymous",
booktitle = "{Parallel and distributed computing systems:
proceedings of the ISCA International Conference, Las
Vegas, Nevada, U.S.A., October 6--8, 1994}",
title = "{Parallel and distributed computing systems:
proceedings of the ISCA International Conference, Las
Vegas, Nevada, U.S.A., October 6--8, 1994}",
publisher = "ISCA",
address = "Raleigh, NC, USA",
pages = "x + 870",
year = "1994",
ISBN = "1-880843-09-9",
ISBN-13 = "978-1-880843-09-3",
LCCN = "QA76.58.I543 1994",
bibdate = "Fri Feb 01 06:55:36 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Anonymous:1994:PPC,
editor = "Anonymous",
booktitle = "{Parallel processing comes of age: real applications
from industry and commerce: Seminar --- June 1994,
London}",
title = "{Parallel processing comes of age: real applications
from industry and commerce: Seminar --- June 1994,
London}",
publisher = "Unicom Seminars",
address = "????",
pages = "????",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "Unicom.",
}
@Proceedings{Anonymous:1994:PSE,
editor = "Anonymous",
booktitle = "{Proceedings. SHARE Europe Spring Conference}",
title = "{Proceedings. SHARE Europe Spring Conference}",
publisher = "SHARE Europe (SEAS)",
address = "Carouge/Geneva, Switzerland",
pages = "xix + 810",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "18--21 April 1994",
conflocation = "Brussels, Belgium",
pubcountry = "Switzerland",
}
@Proceedings{Anonymous:1994:SCC,
editor = "Anonymous",
booktitle = "{Small college computing: 27th Annual symposium ---
April 1994, Winona, MN}",
title = "{Small college computing: 27th Annual symposium ---
April 1994, Winona, MN}",
publisher = "SCCS",
address = "????",
pages = "????",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "SCCS --- Proceedings --- 27th",
acknowledgement = ack-nhfb,
}
@Proceedings{Anonymous:1994:SQC,
editor = "Anonymous",
booktitle = "{Software quality concern for people: proceedings of
the fourth European Conference on Software Quality,
October 17--20, 1994, Basel, Switzerland}",
title = "{Software quality concern for people: proceedings of
the fourth European Conference on Software Quality,
October 17--20, 1994, Basel, Switzerland}",
publisher = "vdf Verlag der Fachvereine",
address = "Zurich, Switzerland",
pages = "538",
year = "1994",
ISBN = "3-7281-2153-3",
ISBN-13 = "978-3-7281-2153-0",
LCCN = "????",
bibdate = "Wed Apr 16 11:49:47 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Arnold:1994:PCT,
editor = "D. Arnold and R. Christie and J. Day and P. Roe",
booktitle = "{Parallel Computing and Transputers. PCAT-93.
Proceedings of the 6th Australian Transputer and Occam
User Group Conference, November 3--4, 1993, Brisbane,
Queensland, Australia}",
title = "{Parallel Computing and Transputers. PCAT-93.
Proceedings of the 6th Australian Transputer and Occam
User Group Conference, November 3--4, 1993, Brisbane,
Queensland, Australia}",
volume = "37",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "383",
year = "1994",
ISBN = "90-5199-149-5",
ISBN-13 = "978-90-5199-149-9",
LCCN = "????",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Transputer and Occam Engineering Series",
acknowledgement = ack-nhfb,
pubcountry = "Netherlands",
}
@Proceedings{Becks:1994:NCT,
editor = "K.-H. Becks and D. Perret-Gallix",
booktitle = "{New computing techniques in physics research III:
proceedings of the Third International Workshop on
Software Engineering, Artificial Intelligence and
Expert Systems for High Energy and Nuclear Physics:
October 4--8, 1993, Oberammergau, Germany}",
title = "{New computing techniques in physics research III:
proceedings of the Third International Workshop on
Software Engineering, Artificial Intelligence and
Expert Systems for High Energy and Nuclear Physics:
October 4--8, 1993, Oberammergau, Germany}",
publisher = pub-WORLD-SCI,
address = pub-WORLD-SCI:adr,
pages = "xvii + 664",
year = "1994",
ISBN = "981-02-1699-8",
ISBN-13 = "978-981-02-1699-3",
LCCN = "QC793.47.E4I58 1993",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
pubcountry = "Singapore",
}
@Proceedings{Bolding:1994:PCR,
editor = "Kevin Bolding and Lawrence Snyder",
booktitle = "{Parallel computer routing and communication: first
international workshop, PCRCW '94, Seattle, Washington,
USA, May 16--18, 1994: proceedings}",
title = "{Parallel computer routing and communication: first
international workshop, PCRCW '94, Seattle, Washington,
USA, May 16--18, 1994: proceedings}",
number = "853",
publisher = pub-SV,
address = pub-SV:adr,
pages = "ix + 317",
year = "1994",
ISBN = "3-540-58429-3",
ISBN-13 = "978-3-540-58429-2",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.P39 1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
conflocation = "Seattle, WA, USA; 16-18 May 1994",
corpsource = "Dept. of Comput. Sci. and Eng., Washington Univ.,
Seattle, WA, USA",
pubcountry = "Germany",
treatment = "P Practical",
}
@Proceedings{Calmet:1994:RWC,
editor = "J. Calmet",
booktitle = "{Rhine workshop on computer algebra --- March 22--24,
1994, Karlsruhe, Germany}",
title = "{Rhine workshop on computer algebra --- March 22--24,
1994, Karlsruhe, Germany}",
publisher = "Universit{\"a}t Karlsruhe",
address = "Karlsruhe, Germany",
pages = "v + 224",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "University of Karlsruhe. Faculty of Informatics.
Institute of Algorithms and Cognitive Systems.",
}
@Proceedings{Davidor:1994:PPS,
editor = "Yuval Davidor and Hans-Paul Schwefel and Reinhard
Manner",
booktitle = "{Parallel problem solving from nature --- PPSN III:
International Conference on Evolutionary Computation,
the Third Conference on Parallel Problem Solving from
Nature, Jerusalem, Israel, October 9--14, 1994:
proceedings}",
title = "{Parallel problem solving from nature --- PPSN III:
International Conference on Evolutionary Computation,
the Third Conference on Parallel Problem Solving from
Nature, Jerusalem, Israel, October 9--14, 1994:
proceedings}",
number = "866",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xv + 642",
year = "1994",
ISBN = "3-540-58484-6",
ISBN-13 = "978-3-540-58484-1",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .I535 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
}
@Proceedings{Decker:1994:PEM,
editor = "K. M. (Karsten M.) Decker and R. M. (Rene M.)
Rehmann",
booktitle = "{Programming environments for massively parallel
distributed systems: working conference of the IFIP
WG10.3, April 25--29, 1994, Ascona, Italy}",
title = "{Programming environments for massively parallel
distributed systems: working conference of the IFIP
WG10.3, April 25--29, 1994, Ascona, Italy}",
publisher = pub-BIRKHAUSER,
address = pub-BIRKHAUSER:adr,
pages = "xiv + 420",
year = "1994",
ISBN = "0-8176-5090-3 (Boston), 3-7643-5090-3 (Basel)",
ISBN-13 = "978-0-8176-5090-2 (Boston), 978-3-7643-5090-1
(Basel)",
LCCN = "QA76.58.P767 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "IFIP WG10.3.",
}
@Proceedings{deGloria:1994:TAS,
editor = "A. de Gloria and M. R. Jane and D. Marini",
booktitle = "{Transputer Applications and Systems '94. Proceedings
of the 1994 World Transputer Congress}",
title = "{Transputer Applications and Systems '94. Proceedings
of the 1994 World Transputer Congress}",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "xi + 1009",
year = "1994",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "5--7 Sept. 1994",
conflocation = "Como, Italy",
confsponsor = "Transputer Consortium; SGS-Thomson Microelectron.;
Eur. Union; Italian Transputer User Group",
pubcountry = "Netherlands",
}
@Proceedings{Dekker:1994:MPP,
editor = "L. (Leendert) Dekker and W. Smit and J. C.
Zuidervaart",
booktitle = "{Massively parallel processing applications and
development: proceedings of the 1994 EUROSIM Conference
on Massively Parallel Processing Applications and
Development, Delft, The Netherlands, 21--23 June
1994}",
title = "{Massively parallel processing applications and
development: proceedings of the 1994 EUROSIM Conference
on Massively Parallel Processing Applications and
Development, Delft, The Netherlands, 21--23 June
1994}",
publisher = pub-ELS,
address = pub-ELS:adr,
pages = "xxii + 973",
year = "1994",
ISBN = "0-444-81784-0",
ISBN-13 = "978-0-444-81784-6",
LCCN = "QA76.58.E98 1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confsponsor = "AKZO NOBEL; BSO; Convex Comput.; HPCN projects; IBM;
NOWESP; et al",
pubcountry = "Netherlands",
}
@Proceedings{Dongarra:1994:PSC,
editor = "Jack Dongarra and Jerzy Wasniewski",
booktitle = "{Parallel scientific computing: First International
Workshop, PARA '94, Lyngby, Denmark, June 20--23, 1994:
proceedings}",
title = "{Parallel scientific computing: First International
Workshop, PARA '94, Lyngby, Denmark, June 20--23, 1994:
proceedings}",
volume = "879",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xi + 566",
year = "1994",
ISBN = "3-540-58712-8 (Berlin), 0-387-58712-8 (New York)",
ISBN-13 = "978-3-540-58712-5 (Berlin), 978-0-387-58712-7 (New
York)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .P35 1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
price = "DM104.00",
series = ser-LNCS,
acknowledgement = ack-nhfb,
confsponsor = "Danish Comput. Centre for Res. and Educ.; Inst. Math.
Modelling; Tech. Univ. Denmark",
pubcountry = "Germany",
sponsor = "Danish Computing Centre for Research and Education.
Technical University of Denmark; Institute for
Mathematical Modelling.",
}
@Proceedings{Dongarra:1994:PSW,
editor = "Jack J. Dongarra and Bernard Tourancheau",
booktitle = "{Proceedings of the Second Workshop on Environments
and Tools for Parallel Scientific Computing: Townsend,
TN, USA, 25--27 May 1994}",
title = "{Proceedings of the Second Workshop on Environments
and Tools for Parallel Scientific Computing: Townsend,
TN, USA, 25--27 May 1994}",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "x + 292",
year = "1994",
ISBN = "0-89871-343-9",
ISBN-13 = "978-0-89871-343-5",
LCCN = "QA76.58.I568 1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
conflocation = "Townsend, TN, USA; 25-27 May 1994",
conftitle = "Proceedings of the Second Workshop on Environments and
Tools for Parallel Scientific Computing",
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
treatment = "P Practical",
}
@Proceedings{Gentzsch:1994:HPC,
editor = "Wolfgang Gentzsch and Uwe Harms",
booktitle = "{High-performance computing and networking:
international conference and exhibition, Munich,
Germany, April 18--20, 1994: proceedings}",
title = "{High-performance computing and networking:
international conference and exhibition, Munich,
Germany, April 18--20, 1994: proceedings}",
volume = "797",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xxii + 519",
year = "1994",
ISBN = "0-387-57981-8 (New York), 3-540-57981-8 (Berlin)",
ISBN-13 = "978-0-387-57981-8 (New York), 978-3-540-57981-6
(Berlin)",
LCCN = "QA76.88.I57 1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
price = "DM96.00",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
conftitle = "High-Performance Computing and Networking
International Conference. Proceedings, Volume II:
Networking and Tools",
corpsource = "German Nat. Res. Center for Comput. Sci., St.
Augustin, Germany",
pubcountry = "Germany",
treatment = "P Practical",
}
@Proceedings{Gruber:1994:PJE,
editor = "Ralf Gruber and Marco Tomassini",
booktitle = "{Proceedings of the 6th Joint EPS-APS International
Conference on Physics Computing: Physics Computing '94,
Palazzo dei Congressi, Lugano, Switzerland, 22--26
August 1994}",
title = "{Proceedings of the 6th Joint EPS-APS International
Conference on Physics Computing: Physics Computing '94,
Palazzo dei Congressi, Lugano, Switzerland, 22--26
August 1994}",
publisher = "European Physical Society",
address = "Geneva, Switzerland",
pages = "xvii + 730",
year = "1994",
ISBN = "2-88270-011-3",
ISBN-13 = "978-2-88270-011-7",
LCCN = "QC20.7.E4I58 1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
pubcountry = "Switzerland",
}
@Proceedings{Hesham:1994:PTS,
editor = "E.-R. Hesham and B. D. Shriver",
booktitle = "{Proceedings of the Twenty-Seventh Hawaii
International Conference on System Sciences. Vol. II:
Software Technology, January 4--7, 1994, Wailea, HI,
USA}",
title = "{Proceedings of the Twenty-Seventh Hawaii
International Conference on System Sciences. Vol. II:
Software Technology, January 4--7, 1994, Wailea, HI,
USA}",
volume = "27",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xv + 681",
year = "1994",
ISBN = "0-8186-5060-5",
ISBN-13 = "978-0-8186-5060-4",
ISSN = "1060-3425",
LCCN = "????",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 94TH0607-2.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE; ACM; Univ. Hawaii; Univ. Hawaii Coll. Bus.
Admin",
}
@Proceedings{Horiguchi:1994:ISP,
editor = "S. Horiguchi and D. Frank Hsu and M. Kimura",
booktitle = "{International Symposium on Parallel Architectures,
Algorithms, and Networks (ISPAN): proceedings of the
1994, December 14--16, 1994, Kanazawa, Japan}",
title = "{International Symposium on Parallel Architectures,
Algorithms, and Networks (ISPAN): proceedings of the
1994, December 14--16, 1994, Kanazawa, Japan}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xi + 452",
year = "1994",
ISBN = "0-8186-6507-6 (case), 0-8186-6506-8 (microfiche)",
ISBN-13 = "978-0-8186-6507-3 (case), 978-0-8186-6506-6
(microfiche)",
LCCN = "QA76.58 .I5673 1994 Bar",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 94TH0697-3.",
acknowledgement = ack-nhfb,
keywords = "parallel processing (electronic computers) --
congresses",
}
@Proceedings{IEEE:1994:FSF,
editor = "{IEEE}",
booktitle = "{Frontiers'95, the 5th Symposium on the Frontiers of
Massively Parallel Computation: proceedings, February
6--9, 1995, McLean, Virginia}",
title = "{Frontiers'95, the 5th Symposium on the Frontiers of
Massively Parallel Computation: proceedings, February
6--9, 1995, McLean, Virginia}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvi + 539",
year = "1994",
ISBN = "0-8186-6965-9",
ISBN-13 = "978-0-8186-6965-1",
LCCN = "QA76.58.S95 1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95TH8024.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc. Tech. Committee on Comput. Archit.;
NASA; Univ. Maryland Inst. Adv. Comput. Studies; George
Mason Univ",
}
@Proceedings{IEEE:1994:IPN,
editor = "{IEEE}",
booktitle = "{ICIP '94: proceedings, November 13--16, 1994, Austin
Convention Center, Austin, Texas}",
title = "{ICIP '94: proceedings, November 13--16, 1994, Austin
Convention Center, Austin, Texas}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "liii + 992 (vol. 1), 1064 (vol. 2), 1050 (vol. 3)",
year = "1994",
ISBN = "0-8186-6952-7 (casebound), 0-8186-6950-0 (paperback),
0-8186-6951-9 (microfiche)",
ISBN-13 = "978-0-8186-6952-1 (casebound), 978-0-8186-6950-7
(paperback), 978-0-8186-6951-4 (microfiche)",
LCCN = "TA1637.I25 1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Three volumes. IEEE catalog no. 94CH35708.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Signal Process. Soc",
}
@Proceedings{IEEE:1994:OOE,
editor = "{IEEE}",
booktitle = "{Oceans 94: Oceans engineering for today's technology
and tomorrow's preservation: proceedings, 13--16
September 13--16, 1994, Brest, France}",
title = "{Oceans 94: Oceans engineering for today's technology
and tomorrow's preservation: proceedings, 13--16
September 13--16, 1994, Brest, France}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xl + 905 (vol. 1), xl + 727 (vol. 2), xl + 630 (vol.
3)",
year = "1994",
ISBN = "0-7803-2057-3, 0-7803-2056-5, 0-7803-2058-1",
ISBN-13 = "978-0-7803-2057-4, 978-0-7803-2056-7,
978-0-7803-2058-1",
ISSN = "0197-7385",
LCCN = "TC 1505 O33197 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Three volumes. IEEE catalog no. 94CH3472-8.",
series = "Oceans",
acknowledgement = ack-nhfb,
sponsor = "IEEE; Ocean Engineering Society.",
}
@Proceedings{IEEE:1994:PIF,
editor = "{IEEE}",
booktitle = "{Proceedings of the 1994 IEEE Frequency Control
Symposium (the 48th annual symposium), 1--3 June 1994,
Westin Hotel-Copley Place, Boston, Massachusetts,
USA}",
title = "{Proceedings of the 1994 IEEE Frequency Control
Symposium (the 48th annual symposium), 1--3 June 1994,
Westin Hotel-Copley Place, Boston, Massachusetts,
USA}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvii + 817",
year = "1994",
ISBN = "0-7803-1945-1",
ISBN-13 = "978-0-7803-1945-5",
LCCN = "TK 7872 O7 I34 1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 94CH3446-2.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Ultrasonics, Ferroelectr. and Frequency Control
Soc",
numericalindex = "Frequency 1.0E+09 to 2.0E+09 Hz",
}
@Proceedings{IEEE:1994:PSI,
editor = "{IEEE}",
booktitle = "{Proceedings / Second International Workshop on
Configurable Distributed Systems, March 21--23, 1994,
Carnegie Mellon University, Pittsburgh, Pennsylvania}",
title = "{Proceedings / Second International Workshop on
Configurable Distributed Systems, March 21--23, 1994,
Carnegie Mellon University, Pittsburgh, Pennsylvania}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "ix + 215",
year = "1994",
ISBN = "0-8186-5390-6",
ISBN-13 = "978-0-8186-5390-2",
LCCN = "QA76.9.D5I595 1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 94TH0651-0.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE; Carnegie Mellon Univ",
}
@Proceedings{IEEE:1994:PSP,
editor = "{IEEE}",
booktitle = "{Proceedings of the Scalable Parallel Libraries
Conference, October 6--8, 1993, Mississippi State,
Mississippi}",
title = "{Proceedings of the Scalable Parallel Libraries
Conference, October 6--8, 1993, Mississippi State,
Mississippi}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "vii + 261",
year = "1994",
ISBN = "0-8186-4980-1",
ISBN-13 = "978-0-8186-4980-6",
LCCN = "QA76.58.S34 1993",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
conflocation = "Mississippi State, MS, USA; 6-8 Oct. 1993",
confsponsor = "Mississippi State Univ.; Nat. Sci. Found",
conftitle = "Proceedings of Scalable Parallel Libraries
Conference",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
sponsororg = "Mississippi State Univ.; Nat. Sci. Found",
treatment = "P Practical",
}
@Proceedings{IEEE:1994:PSW,
editor = "{IEEE}",
booktitle = "{Proceedings, Supercomputing '94: Washington, DC,
November 14--18, 1994}",
title = "{Proceedings, Supercomputing '94: Washington, DC,
November 14--18, 1994}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvii + 823",
year = "1994",
ISBN = "0-8186-6607-2, 0-8186-6605-6, 0-8186-6606-4",
ISBN-13 = "978-0-8186-6607-0, 978-0-8186-6605-6,
978-0-8186-6606-3",
ISSN = "1063-9535",
LCCN = "QA76.5 .S894 1994",
bibdate = "Mon Aug 26 10:38:41 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 94CH34819.",
series = "Supercomputing",
acknowledgement = ack-nhfb,
sponsor = "IEEE.",
}
@Proceedings{IEEE:1994:PTI,
editor = "{IEEE}",
booktitle = "{Proceedings of the Third IEEE International Symposium
on High Performance Distributed Computing, August 2--5,
1994, San Francisco, California}",
title = "{Proceedings of the Third IEEE International Symposium
on High Performance Distributed Computing, August 2--5,
1994, San Francisco, California}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiii + 304",
year = "1994",
ISBN = "0-8186-6395-2",
ISBN-13 = "978-0-8186-6395-6",
LCCN = "QA76.9.D5I328 1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 94TH0667-6.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc. Tech. Committee on Distributed
Process.; Northeast Parallel Archit. Center (NPAC) at
Syracuse Univ.; ACM SIGCOMM",
}
@Proceedings{Joubert:1994:PCT,
editor = "G. R. Joubert and F. J. Peters and D. Trystram and D.
J. Evans",
booktitle = "{Parallel computing: trends and applications:
proceedings of the international conference ParCo93,
Grenoble, France, 7--10 September 1993}",
title = "{Parallel computing: trends and applications:
proceedings of the international conference ParCo93,
Grenoble, France, 7--10 September 1993}",
volume = "9",
publisher = pub-NH,
address = pub-NH:adr,
pages = "xvi + 728",
year = "1994",
ISBN = "0-444-81841-3",
ISBN-13 = "978-0-444-81841-6",
LCCN = "QA76.58 .P3794 1993",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Advances in parallel computing",
acknowledgement = ack-nhfb,
confsponsor = "ARCHIPEL; CNRS; Elsevier Sci. Publishers; IMAG; INPG;
INRIA; et al",
pubcountry = "Netherlands",
xxeditor = "G. R. Joubert and D. Trystram and F. J. Peters and D.
J. Evans",
}
@Proceedings{Kumar:1994:PPI,
editor = "V. K. Prasanna Kumar",
booktitle = "{Parallel processing: 1st IWWP: proceedings of the
First International Workshop on Parallel Processing
(IWPP-94), December 26--31, 1994, Bangalore, India}",
title = "{Parallel processing: 1st IWWP: proceedings of the
First International Workshop on Parallel Processing
(IWPP-94), December 26--31, 1994, Bangalore, India}",
publisher = "Tata McGraw-Hill Pub. Co",
address = "New Delhi, India",
pages = "xxiii + 736",
year = "1994",
ISBN = "0-07-462332-X",
ISBN-13 = "978-0-07-462332-9",
LCCN = "QA 76.58 I587 1994",
bibdate = "Tue May 12 08:53:36 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Miles:1994:PTO,
editor = "Roger Miles and Alan Chalmers",
booktitle = "{Progress in Transputer and occam Research, WoTUG-17
Proceedings of the 17th World occam and Transputer User
Group Technical Meeting, April 10--13, 1994, Bristol,
UK}",
title = "{Progress in Transputer and occam Research, WoTUG-17
Proceedings of the 17th World occam and Transputer User
Group Technical Meeting, April 10--13, 1994, Bristol,
UK}",
volume = "38",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "vii + 221",
year = "1994",
ISBN = "90-5199-163-0",
ISBN-13 = "978-90-5199-163-5",
LCCN = "????",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Transputer and Occam Engineering Series",
acknowledgement = ack-nhfb,
pubcountry = "Netherlands",
sponsor = "World occam and Transputer User Group.",
}
@Proceedings{Ostrand:1994:PIS,
editor = "Thomas Ostrand",
booktitle = "{Proceedings of the 1994 International Symposium on
Software Testing and Analysis (ISSTA): August 17--19,
1994, Seattle, Washington, USA}",
title = "{Proceedings of the 1994 International Symposium on
Software Testing and Analysis (ISSTA): August 17--19,
1994, Seattle, Washington, USA}",
publisher = pub-ACM,
address = pub-ACM:adr,
year = "1994",
CODEN = "SFENDP",
ISBN = "0-89791-683-2",
ISBN-13 = "978-0-89791-683-7",
ISSN = "0163-5948",
LCCN = "QA76.76.T48 I58 1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
series = j-SIGSOFT,
acknowledgement = ack-nhfb,
fjournal = "ACM SIGSOFT Software Engineering Notes",
issue = "spec. issue. p. 216-227",
journal-URL = "https://dl.acm.org/citation.cfm?id=J728",
}
@Proceedings{Pehrson:1994:IPP,
editor = "Bj{\"o}rn Pehrson and Imre Simon and Klaus Brunnstein
and Eckart Raubold and Karen Duncan and Karl Krueger",
booktitle = "{Information processing '94: proceedings of the IFIP
13th World Computer Congress, Hamburg, Germany, 28
August--2 September, 1994}",
title = "{Information processing '94: proceedings of the IFIP
13th World Computer Congress, Hamburg, Germany, 28
August--2 September, 1994}",
volume = "A-51, A-52, A-53",
publisher = pub-NH,
address = pub-NH:adr,
pages = "402--409",
year = "1994",
CODEN = "ITATEC",
ISBN = "0-444-81990-8, 0-444-81989-4",
ISBN-13 = "978-0-444-81990-1, 978-0-444-81989-5",
ISSN = "0926-5473",
LCCN = "QA75.5.I3785 1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Three volumes.",
series = j-IFIP-TRANS-A,
acknowledgement = ack-nhfb,
pubcountry = "Netherlands",
sponsor = "IFIP. Gesellschaft fur Informatik.",
}
@Proceedings{Pierce:1994:PSH,
editor = "P. Pierce and G. Regnier",
booktitle = "{Proceedings of the Scalable High-Per\-for\-mance
Computing Conference, May 23--25, 1994, Knoxville,
Tennessee}",
title = "{Proceedings of the Scalable High-Per\-for\-mance
Computing Conference, May 23--25, 1994, Knoxville,
Tennessee}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 852",
year = "1994",
ISBN = "0-8186-5680-8, 0-8186-5681-6",
ISBN-13 = "978-0-8186-5680-4, 978-0-8186-5681-1",
LCCN = "QA76.58.S32 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 94TH0637-9.",
acknowledgement = ack-nhfb,
sponsor = "IEEE Computer Society; Technical Committee on
Supercomputing Applications.",
}
@Proceedings{Sall:1994:CIS,
editor = "J. Sall and A. Lehman",
booktitle = "{Computational intensive statistical methods: 26th
Symposium on the interface --- June 15-18, 1994,
Research Triangle Park, NC, USA}",
title = "{Computational intensive statistical methods: 26th
Symposium on the interface --- June 15-18, 1994,
Research Triangle Park, NC, USA}",
volume = "26",
publisher = "Fairfax Station: Interface Foundation of North
America",
address = "????",
pages = "????",
year = "1994",
ISBN = "1-886658-00-5",
ISBN-13 = "978-1-886658-00-4",
LCCN = "QA276.4.S95 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Computing Science and Statistics Conference",
acknowledgement = ack-nhfb,
sponsor = "Interface Foundation of North America.",
}
@Proceedings{Siegal:1994:PEI,
editor = "Howard Jay Siegal",
booktitle = "{Proceedings / Eighth International Parallel
Processing Symposium, April 26--29, 1994, Cancun,
Mexico}",
title = "{Proceedings / Eighth International Parallel
Processing Symposium, April 26--29, 1994, Cancun,
Mexico}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxx + 966",
year = "1994",
ISBN = "0-8186-5602-6",
ISBN-13 = "978-0-8186-5602-6",
LCCN = "QA76.58.I58 1994",
bibdate = "Sun Dec 22 10:18:08 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 94CH34819.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE; ACM",
}
@Proceedings{Turchi:1994:SDA,
editor = "Patrice E. A. Turchi and Antonios Gonis",
booktitle = "{Statics and dynamics of alloy phase transformations:
Proceedings of a NATO Advanced Study Institute on
Statics and Dynamics of Alloy Phase Transformations,
held June 21--July 3, 1992, in Rhodes, Greece}",
title = "{Statics and dynamics of alloy phase transformations:
Proceedings of a NATO Advanced Study Institute on
Statics and Dynamics of Alloy Phase Transformations,
held June 21--July 3, 1992, in Rhodes, Greece}",
volume = "319",
publisher = pub-PLENUM,
address = pub-PLENUM:adr,
pages = "xiii + 737",
year = "1994",
ISBN = "0-306-44626-X",
ISBN-13 = "978-0-306-44626-9",
ISSN = "0258-1221",
LCCN = "TN690.S77 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "NATO ASI Series B Physics",
acknowledgement = ack-nhfb,
}
@Proceedings{USENIX:1994:PFU,
editor = "{USENIX}",
booktitle = "{Proceedings of the First USENIX Symposium on
Operating Systems Design and Implementation (OSDI),
November 14--17, 1994, Monterey, California, USA}",
title = "{Proceedings of the First USENIX Symposium on
Operating Systems Design and Implementation (OSDI),
November 14--17, 1994, Monterey, California, USA}",
publisher = pub-USENIX,
address = pub-USENIX:adr,
pages = "280",
year = "1994",
ISBN = "1-880446-66-9",
ISBN-13 = "978-1-880446-66-9",
LCCN = "QA 76.76 O63 U87 1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confsponsor = "ACM; IEEE",
}
@Proceedings{Wagner:1994:CFD,
editor = "S. (Siegfried) Wagner and J. (Jacques) Periaux and E.
H. (Ernst-Heinrich) Hirschel",
booktitle = "{Computational fluid dynamics '94: proceedings of the
Second European Computational Fluid Dynamics
Conference, 5--8 September 1994, Stuttgart, Germany}",
title = "{Computational fluid dynamics '94: proceedings of the
Second European Computational Fluid Dynamics
Conference, 5--8 September 1994, Stuttgart, Germany}",
publisher = pub-WILEY,
address = pub-WILEY:adr,
pages = "xvi + 1029",
year = "1994",
ISBN = "0-471-95063-7",
ISBN-13 = "978-0-471-95063-9",
LCCN = "QA911.E95 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "European Committee on Computational Methods in Applied
Sciences.",
}
@Proceedings{ACM:1995:PAS,
editor = "{ACM}",
booktitle = "{Proceedings of the 33rd annual southeast conference
[ACM]: Clemson, South Carolina, March 17--18, 1995}",
title = "{Proceedings of the 33rd annual southeast conference
[ACM]: Clemson, South Carolina, March 17--18, 1995}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "290",
year = "1995",
ISBN = "0-89791-747-2",
ISBN-13 = "978-0-89791-747-6",
LCCN = "????",
bibdate = "Wed Apr 16 13:28:48 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "17--18 March 1995",
conflocation = "Clemson, SC, USA",
confsponsor = "ACM",
}
@Proceedings{ACM:1995:SAA,
editor = "{ACM}",
booktitle = "{SPAA '95, 7th Annual ACM Symposium on Parallel
Algorithms and Architectures: July 17--19, 1995, Santa
Barbara, CA, USA}",
title = "{SPAA '95, 7th Annual ACM Symposium on Parallel
Algorithms and Architectures: July 17--19, 1995, Santa
Barbara, CA, USA}",
volume = "7",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "viii + 308",
year = "1995",
ISBN = "0-89791-717-0",
ISBN-13 = "978-0-89791-717-9",
LCCN = "QA76.642 .A25 1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
conflocation = "Santa Barbara, CA, USA; 17-19 July 1995",
conftitle = "Proceedings of Seventh Annual ACM Symposium on
Parallel Algorithms and Architectures",
corpsource = "California Inst. of Technol., Pasadena, CA, USA",
sponsor = "ACM. Special Interest Group on Algorithms and
Computation Theory ACM. Special Interest Group on
Computer Architecture Theory ACM. Special Interest
Group on Computer Architecture European Association for
Theoretical Computer Science.",
sponsororg = "ACM; EATCS",
treatment = "P Practical",
}
@Proceedings{Agrawal:1995:PIW,
editor = "D. P. Agrawal",
booktitle = "{Proceedings of the 1995 ICPP Workshop on Challenges
for Parallel Processing, August 14, 1995, Raleigh, NC,
USA}",
title = "{Proceedings of the 1995 ICPP Workshop on Challenges
for Parallel Processing, August 14, 1995, Raleigh, NC,
USA}",
publisher = pub-CRC,
address = pub-CRC:adr,
pages = "vi + 162",
year = "1995",
ISBN = "0-8493-2618-4",
ISBN-13 = "978-0-8493-2618-9",
LCCN = "QA76.58.I34 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confsponsor = "Pennsylvania State Univ",
}
@Proceedings{Aityan:1995:PFI,
editor = "S. K. Aityan and L. T. Grujic and R. J. Hathaway and
G. S. Ladde and N. Medhin and M. Sambandham",
booktitle = "{Proceedings of the First International Conference on
Neural, Parallel and Scientific Computations held at
Morehouse College, Atlanta, USA, May 28--31, 1995}",
title = "{Proceedings of the First International Conference on
Neural, Parallel and Scientific Computations held at
Morehouse College, Atlanta, USA, May 28--31, 1995}",
publisher = "Dynamic Publishers",
address = "Atlanta, GA, USA",
pages = "xi + 506",
year = "1995",
ISBN = "0-9640398-9-3 (hardback) 0-9640398-8-5 (paperback)",
ISBN-13 = "978-0-9640398-9-6 (hardback) 978-0-9640398-8-9
(paperback)",
LCCN = "QA76.87 .I58 1995",
bibdate = "Wed Apr 16 13:17:34 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Proceedings of Neural Parallel and Scientific
Computations 1995",
acknowledgement = ack-nhfb,
}
@Proceedings{Alnuweiri:1995:PHF,
editor = "Hussein M. Alnuweiri and Mounir Hamdi",
booktitle = "{Proceedings of HiNet '95: first international
workshop on high-speed network computing, April 25,
1995, Santa Barbara, California}",
title = "{Proceedings of HiNet '95: first international
workshop on high-speed network computing, April 25,
1995, Santa Barbara, California}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "vii + 119",
year = "1995",
ISBN = "0-8186-7124-6",
ISBN-13 = "978-0-8186-7124-1",
LCCN = "TK5105.5 .H56 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "IEEE.",
}
@Proceedings{Anonymous:1995:CCS,
editor = "Anonymous",
booktitle = "{3rd CLIPS conference --- September 1994, Houston,
TX}",
title = "{3rd CLIPS conference --- September 1994, Houston,
TX}",
publisher = pub-NASA,
address = pub-NASA:adr,
pages = "????",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "NASA Publications N N95-19625-647, N95-19747-768",
acknowledgement = ack-nhfb,
sponsor = "United States. National Aeronautics and Space
Administration.",
}
@Proceedings{Anonymous:1995:RSS,
editor = "Anonymous",
booktitle = "{Reservoir simulation: 13th Symposium --- February
1995, San Antonio, TX}",
title = "{Reservoir simulation: 13th Symposium --- February
1995, San Antonio, TX}",
publisher = pub-SPE,
address = pub-SPE:adr,
pages = "????",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Papers --- Society of Petroleum Engineers of AIME",
acknowledgement = ack-nhfb,
sponsor = "American Institute of Mechanical Engineers; Society of
Petroleum Engineers.",
}
@Proceedings{ANS:1995:MCR,
editor = "{ANS}",
booktitle = "{Mathematics and computations, reactor physics, and
environmental analyses: International conference ---
April 1995, Portland, OR}",
title = "{Mathematics and computations, reactor physics, and
environmental analyses: International conference ---
April 1995, Portland, OR}",
publisher = "American Nuclear Society",
address = "La Grange Park, IL, USA",
pages = "xvi + 1597",
year = "1995",
ISBN = "0-89448-198-3",
ISBN-13 = "978-0-89448-198-7",
LCCN = "TK9006.M37 1995",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
acknowledgement = ack-nhfb,
sponsor = "American Nuclear Society; Mathematics and Computation
Division.",
xxeditor-1 = "A. Qaddouri and R. Roy and B. Goulard",
xxeditor-2 = "Z. Stankovski",
}
@Proceedings{Arabnia:1995:TRA,
editor = "Hamid Arabnia",
booktitle = "{Transputer research and applications 7: American
Transputer Users Group, October 23--25, 1994, Atlanta,
GA (NATUG-7)}",
title = "{Transputer research and applications 7: American
Transputer Users Group, October 23--25, 1994, Atlanta,
GA (NATUG-7)}",
volume = "42",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "ix + 349",
year = "1995",
ISBN = "90-5199-187-8 (IOS Press), 4-274-90017-7 (Ohmsha)",
ISBN-13 = "978-90-5199-187-1 (IOS Press), 978-4-274-90017-4
(Ohmsha)",
ISSN = "0925-4986",
LCCN = "????",
bibdate = "Mon Jan 15 18:41:48 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Transputer and occam engineering series",
acknowledgement = ack-nhfb,
}
@Proceedings{Bailey:1995:PSS,
editor = "D. H. Bailey and P. E. Bjorstad and J. R. Gilbert and
M. V. Mascagni and R. S. Schreiber and H. D. Simon and
V. J. Torczon and L. T. Watson",
booktitle = "{Proceedings of the Seventh SIAM Conference on
Parallel Processing for Scientific Computing (San
Francisco, CA, USA)}",
title = "{Proceedings of the Seventh SIAM Conference on
Parallel Processing for Scientific Computing (San
Francisco, CA, USA)}",
publisher = pub-SIAM,
address = pub-SIAM:adr,
pages = "xviii + 875",
year = "1995",
ISBN = "0-89871-344-7",
ISBN-13 = "978-0-89871-344-2",
LCCN = "QA76.58.S55 1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "15--17 Feb. 1995",
conflocation = "San Francisco, CA, USA; 15-17 Feb. 1995",
conftitle = "Proceedings of the Seventh SIAM Conference on Parallel
Processing for Scientific Computing",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
sponsor = "Society for Industrial and Applied Mathematics.",
treatment = "P Practical",
}
@Proceedings{Bernardi:1995:CCE,
editor = "Francesco Bernardi and Jean-Louis Rivail",
booktitle = "{Computational chemistry: 1st European conference on
computational chemistry (May 1994, Nancy, France)}",
title = "{Computational chemistry: 1st European conference on
computational chemistry (May 1994, Nancy, France)}",
number = "330",
publisher = pub-AIP,
address = pub-AIP:adr,
pages = "various",
year = "1995",
ISBN = "1-56396-457-0",
ISBN-13 = "978-1-56396-457-2",
ISSN = "0094-243X (print), 1551-7616 (electronic), 1935-0465",
LCCN = "QD39.3.E46 E15 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "AIP Conference Proceedings",
acknowledgement = ack-nhfb,
sponsor = "Federation of European Chemical Societies.",
}
@Proceedings{Bilger:1995:AFM,
editor = "R. W. Bilger",
booktitle = "{12th Australasian fluid mechanics conference: ---
December 1995, Sydney, Australia}",
title = "{12th Australasian fluid mechanics conference: --
December 1995, Sydney, Australia}",
publisher = "University of Sydney",
address = "????",
pages = "????",
year = "1995",
ISBN = "0-86934-034-4",
ISBN-13 = "978-0-86934-034-9",
LCCN = "????",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Australasian Fluid Mechanics Conference 1995; EDIT
12//V2",
acknowledgement = ack-nhfb,
sponsor = "University of Sydney.",
}
@Proceedings{Breitenecker:1995:ESC,
editor = "Felix Breitenecker and Irmgard Husinsky",
booktitle = "{EUROSIM '95: simulation congress: proceedings of the
EUROSIM Conference, EUROSIM '95, Vienna, Austria,
11--15 September 1995}",
title = "{EUROSIM '95: simulation congress: proceedings of the
EUROSIM Conference, EUROSIM '95, Vienna, Austria,
11--15 September 1995}",
publisher = pub-ELS,
address = pub-ELS:adr,
pages = "xxii + 1356",
year = "1995",
ISBN = "0-444-82241-0",
ISBN-13 = "978-0-444-82241-3",
LCCN = "A76.9.C65E966 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
pubcountry = "Netherlands",
sponsor = "Federation of the European Simulation Societies.",
}
@Proceedings{Cantoni:1995:CCA,
editor = "Virginio Cantoni and L. Lombardi and M. Mosconi and M.
Savini and A. Setti",
booktitle = "{CAMP '95, computer architectures for machine
perception: proceedings, September 18--20, 1995, Como,
Italy}",
title = "{CAMP '95, computer architectures for machine
perception: proceedings, September 18--20, 1995, Como,
Italy}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "x + 461",
year = "1995",
ISBN = "0-8186-7134-3",
ISBN-13 = "978-0-8186-7134-0",
LCCN = "QA76.9.A73W675 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95TB8093.",
acknowledgement = ack-nhfb,
confsponsor = "Pavia Univ. --- Dipt. Inf. Sistemistica Centro di
Cultura Sci. `A. Volta'; IEEE Comput. Soc. Tech.
Committee on Comput. Archit.; IEEE Comput. Soc. Tech.
Committee on PAMI; ACM SIGART/SIGARCH; Int. Assoc.
Pattern Recognition",
}
@Proceedings{Cook:1995:TAS,
editor = "B. M. Cook and M. R. Jane and P. Nixon and P. M.
Welch",
booktitle = "{Transputer Applications and Systems '95. Proceedings
of the 1995 World Transputer Congress, 4--6 September
1995, Harrogate, North Yorkshire, UK}",
title = "{Transputer Applications and Systems '95. Proceedings
of the 1995 World Transputer Congress, 4--6 September
1995, Harrogate, North Yorkshire, UK}",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "614",
year = "1995",
ISBN = "90-5199-235-1 (IOS Press), 4-274-90062-2 (Ohmsha)",
ISBN-13 = "978-90-5199-235-9 (IOS Press), 978-4-274-90062-4
(Ohmsha)",
LCCN = "????",
bibdate = "Wed Apr 16 12:07:36 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:1995:HPC,
editor = "J. J. Dongarra and others",
booktitle = "{High performance computing: technology, methods, and
applications (Advanced workshop, June 1994, Cetraro,
Italy)}",
title = "{High performance computing: technology, methods, and
applications (Advanced workshop, June 1994, Cetraro,
Italy)}",
volume = "10",
publisher = pub-ELS,
address = pub-ELS:adr,
pages = "viii + 427",
year = "1995",
ISBN = "0-444-82163-5",
ISBN-13 = "978-0-444-82163-8",
ISSN = "0927-5452",
LCCN = "QA76.88.H55 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Advances in Parallel Computing",
acknowledgement = ack-nhfb,
}
@Proceedings{El-Rewini:1995:PTE,
editor = "H. El-Rewini and B. D. Shriver",
booktitle = "{Proceedings of the Twenty-Eighth Hawaii International
Conference on System Sciences}",
title = "{Proceedings of the Twenty-Eighth Hawaii International
Conference on System Sciences}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "x + 361 (vol. 1), xv + 762 (vol. 2), xv + 600 (vol.
3), xx + 1042 (vol. 4), x + 362 (vol. 5)",
year = "1995",
ISBN = "0-8186-6935-7",
ISBN-13 = "978-0-8186-6935-4",
LCCN = "????",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confdate = "3--6 Jan. 1995",
conflocation = "Wailea, HI, USA",
confsponsor = "Univ. Hawaii; Univ. Hawaii Coll. Bus. Admin.; IEEE
Comput. Soc.; ACM; PRISM",
}
@Proceedings{Ferenczi:1995:PAH,
editor = "Szabolcs Ferenczi and Peter Kacsuk",
booktitle = "{Proceedings of the 2nd Austrian-Hungarian Workshop on
Transputer Applications: September 29--October 1, 1994,
Budapest, Hungary}",
title = "{Proceedings of the 2nd Austrian-Hungarian Workshop on
Transputer Applications: September 29--October 1, 1994,
Budapest, Hungary}",
publisher = "Hungarian Academy of Sciences, Central Research
Intitute for Physics",
address = "Budapest, Hungary",
pages = "vii + 282",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Apr 16 13:32:12 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Technical report KFKI-1995-2/M,N.",
acknowledgement = ack-nhfb,
}
@Proceedings{Ferreira:1995:PAI,
editor = "Afonso Ferreira and Jose Rolim",
booktitle = "{Parallel algorithms for irregularly structured
problems: second international workshop, IRREGULAR 95,
Lyon, France, September, 4--6, 1995: proceedings}",
title = "{Parallel algorithms for irregularly structured
problems: second international workshop, IRREGULAR 95,
Lyon, France, September, 4--6, 1995: proceedings}",
publisher = pub-SV,
address = pub-SV:adr,
pages = "x + 409",
year = "1995",
ISBN = "3-540-60321-2",
ISBN-13 = "978-3-540-60321-4",
LCCN = "QA76.642.I59 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
confsponsor = "IFIP",
pubcountry = "Germany",
}
@Proceedings{Fritzson:1995:PPA,
editor = "Peter Fritzson and Leif Finmo",
booktitle = "{Parallel programming and applications: proceedings of
the Workshop on Parallel Programming and Computation
(ZEUS '95) and the 4th Nordic Transputer Conference
(NTUG '95): Link{\"o}ping, Sweden}",
title = "{Parallel programming and applications: proceedings of
the Workshop on Parallel Programming and Computation
(ZEUS '95) and the 4th Nordic Transputer Conference
(NTUG '95): Link{\"o}ping, Sweden}",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "ix + 435",
year = "1995",
ISBN = "90-5199-229-7 (IOS Press), 4-274-90056-8 (Ohmsha)",
ISBN-13 = "978-90-5199-229-8 (IOS Press), 978-4-274-90056-3
(Ohmsha)",
LCCN = "????",
bibdate = "Wed Apr 16 13:23:58 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Gates:1995:PFI,
editor = "W. Lawrence (William Lawrence) Gates",
booktitle = "{Proceedings of the First International AMIP
Scientific Conference: Monterey, California, USA,
15--19 May 1995}",
title = "{Proceedings of the First International AMIP
Scientific Conference: Monterey, California, USA,
15--19 May 1995}",
number = "732",
publisher = "World Meteorological Organization",
address = "Geneva, Switzerland",
pages = "viii + 532",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "SIO 1 WO326 v.92",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "World Meteorological Organization --- Publications ---
WMO TD 1995",
acknowledgement = ack-nhfb,
sponsor = "Atmospheric Model Intercomparison Project.",
}
@Proceedings{Gray:1995:PCT,
editor = "J. P. Gray and F. Naghdy",
booktitle = "{Parallel Computing: Technology and Practice. PCAT-94.
Proceedings of the 7th Australian Transputer and Occam
User Group Conference: Woollongong, NSW, Australia,
8--9 November 1994}",
title = "{Parallel Computing: Technology and Practice. PCAT-94.
Proceedings of the 7th Australian Transputer and Occam
User Group Conference: Woollongong, NSW, Australia,
8--9 November 1994}",
publisher = pub-IOS,
address = pub-IOS:adr,
pages = "vii + 300",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Apr 16 12:10:49 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Grinstein:1995:VDE,
editor = "Georges G. Grinstein and Robert F. Erbacher",
booktitle = "{Visual data exploration and analysis II: 8--10
February 1995, San Jose, California}",
title = "{Visual data exploration and analysis II: 8--10
February 1995, San Jose, California}",
volume = "2410",
publisher = pub-SPIE,
address = pub-SPIE:adr,
pages = "viii + 482",
year = "1995",
CODEN = "PSISDG",
ISBN = "0-8194-1757-2",
ISBN-13 = "978-0-8194-1757-2",
ISSN = "0277-786X (print), 1996-756X (electronic)",
LCCN = "TS510.S63 v.2410",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = j-PROC-SPIE,
acknowledgement = ack-nhfb,
confsponsor = "SPIE",
}
@Proceedings{Hamza:1995:PII,
editor = "M. H. Hamza",
booktitle = "{Proceedings of the IASTED International Conference.
Modelling and Simulation: Pittsburgh, PA, USA, 27--29
April 1995}",
title = "{Proceedings of the IASTED International Conference.
Modelling and Simulation: Pittsburgh, PA, USA, 27--29
April 1995}",
publisher = "IASTEC-Acta Press",
address = "Anaheim, CA, USA",
pages = "598",
year = "1995",
ISBN = "0-88986-218-4",
ISBN-13 = "978-0-88986-218-0",
LCCN = "QA76.9.C65 I295 1995",
bibdate = "Fri Feb 01 06:58:29 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Haridi:1995:EPP,
editor = "Seif Haridi and Khayri Ali and Peter Magnusson",
booktitle = "{EURO-PAR '95 parallel processing: First International
EURO PAR Conference, Stockholm, Sweden, August 29--31,
1995: proceedings}",
title = "{EURO-PAR '95 parallel processing: First International
EURO PAR Conference, Stockholm, Sweden, August 29--31,
1995: proceedings}",
number = "966",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xv + 730",
year = "1995",
ISBN = "3-540-60247-X",
ISBN-13 = "978-3-540-60247-7",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.I553 1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
corpsource = "Centro Svizzero de Calcolo Sci., Eidgenossische Tech.
Hochschule, Manno, Switzerland",
pubcountry = "Germany",
sponsor = "Swedish Institute of Computer Science.",
treatment = "P Practical",
}
@Proceedings{Hassanzadeh:1995:MMG,
editor = "Siamak Hassanzadeh",
booktitle = "{Mathematical methods in geophysical imaging III:
12--13 July 1995, San Diego, California}",
title = "{Mathematical methods in geophysical imaging III:
12--13 July 1995, San Diego, California}",
volume = "2571",
publisher = pub-SPIE,
address = pub-SPIE:adr,
pages = "vii + 240",
year = "1995",
CODEN = "PSISDG",
ISBN = "0-8194-1930-3",
ISBN-13 = "978-0-8194-1930-9",
ISSN = "0277-786X (print), 1996-756X (electronic)",
LCCN = "TS510.S63 v.2571",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = j-PROC-SPIE,
acknowledgement = ack-nhfb,
confsponsor = "SPIE",
}
@Proceedings{Hertzberger:1995:HPM,
editor = "Bob Hertzberger and Giuseppe Serazzi",
booktitle = "{High-Per\-for\-mance computing and networking:
International Conference and Exhibition, Milan, Italy,
May 3--5, 1995: proceedings}",
title = "{High-Per\-for\-mance computing and networking:
International Conference and Exhibition, Milan, Italy,
May 3--5, 1995: proceedings}",
number = "919",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xxiv + 957",
year = "1995",
ISBN = "3-540-59393-4",
ISBN-13 = "978-3-540-59393-5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.88 .I57 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
sponsor = "High Performance Computing and Networking
Foundation.",
}
@Proceedings{Hoffmann:1995:CAP,
editor = "Geerd-R. Hoffmann and Norbert Kreitz",
booktitle = "{Coming of age: proceedings of the Sixth ECMWF
Workshop on the Use of Parallel Processors in
Meteorology, Reading, UK, November 21--25, 1994}",
title = "{Coming of age: proceedings of the Sixth ECMWF
Workshop on the Use of Parallel Processors in
Meteorology, Reading, UK, November 21--25, 1994}",
publisher = pub-WORLD-SCI,
address = pub-WORLD-SCI:adr,
pages = "x + 568",
year = "1995",
ISBN = "981-02-2211-4",
ISBN-13 = "978-981-02-2211-6",
LCCN = "QC866.E26 1994",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
pubcountry = "Singapore",
}
@Proceedings{IEEE:1995:CPI,
editor = "{IEEE}",
booktitle = "{Conference proceedings of the 1995 IEEE Fourteenth
Annual International Phoenix Conference on Computers
and Communications: Scottsdale, Arizona, USA, March
28--31, 1995}",
title = "{Conference proceedings of the 1995 IEEE Fourteenth
Annual International Phoenix Conference on Computers
and Communications: Scottsdale, Arizona, USA, March
28--31, 1995}",
volume = "14",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvii + 742",
year = "1995",
ISBN = "0-7803-2493-5, 0-7803-2492-7, 0-7803-2494-3",
ISBN-13 = "978-0-7803-2493-0, 978-0-7803-2492-3,
978-0-7803-2494-7",
LCCN = "TK7885.A1 I567 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95CH35751.",
acknowledgement = ack-nhfb,
sponsor = "IEEE.",
}
@Proceedings{IEEE:1995:DPT,
editor = "{IEEE}",
booktitle = "{Digest of papers / the Twenty-fifth International
Symposium on Fault-Tolerant Computing, June 27--30,
1995, Pasadena, California}",
title = "{Digest of papers / the Twenty-fifth International
Symposium on Fault-Tolerant Computing, June 27--30,
1995, Pasadena, California}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxiii + 547",
year = "1995",
ISBN = "0-8186-7079-7",
ISBN-13 = "978-0-8186-7079-4",
LCCN = "QA 76.9 F38 I57 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95CB35823.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc. Tech. Committee on Fault-Tolerant
Comput.; LAAS-CNRS, France; Univ. Illinois at
Urbana-Champaign; Univ. California at Los Angeles; Jep
Propulsion Lab.; IFIP WG 10.4",
}
@Proceedings{IEEE:1995:IIC,
editor = "{IEEE}",
booktitle = "{1995 IEEE International Conference on Systems, Man,
and Cybernetics: intelligent systems for the 21st
century: Vancouver, British Columbia, Canada, October
22--25, 1995}",
title = "{1995 IEEE International Conference on Systems, Man,
and Cybernetics: intelligent systems for the 21st
century: Vancouver, British Columbia, Canada, October
22--25, 1995}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "4711",
year = "1995",
ISBN = "0-7803-2559-1",
ISBN-13 = "978-0-7803-2559-3",
LCCN = "TA168.I19 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Five volumes. IEEE catalog no. 95CH3576-7.",
acknowledgement = ack-nhfb,
}
@Proceedings{IEEE:1995:IPR,
editor = "{IEEE}",
booktitle = "{IEEE Pacific Rim Conference on Communications,
Computers, and Signal Processing: proceedings / May
17--19, 1995, Victoria Conference Centre, Victoria,
British Columbia, Canada}",
title = "{IEEE Pacific Rim Conference on Communications,
Computers, and Signal Processing: proceedings / May
17--19, 1995, Victoria Conference Centre, Victoria,
British Columbia, Canada}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiv + 638",
year = "1995",
ISBN = "0-7803-2553-2",
ISBN-13 = "978-0-7803-2553-1",
LCCN = "TK 5101 A1 I34 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95CH35765.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Victoria Sect.; IEEE Canada; Dept. Comput. Sci.
and the Fac. Eng., Univ. Victoria",
}
@Proceedings{IEEE:1995:ISE,
editor = "{IEEE}",
booktitle = "{Ideas in Science and Electronics Exposition and
Symposium. Proceedings: Albuquerque, NM, USA, 9--11 May
1995}",
title = "{Ideas in Science and Electronics Exposition and
Symposium. Proceedings: Albuquerque, NM, USA, 9--11 May
1995}",
volume = "17",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "64",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Annual Ideas in Science and Electronics Exposition and
Symposium Conference",
acknowledgement = ack-nhfb,
sponsor = "IEEE.",
}
@Proceedings{IEEE:1995:PEW,
editor = "{IEEE}",
booktitle = "{Proceedings: Euromicro Workshop on Parallel and
Distributed Processing, San Remo, Italy, January
25--27, 1995}",
title = "{Proceedings: Euromicro Workshop on Parallel and
Distributed Processing, San Remo, Italy, January
25--27, 1995}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiii + 582",
year = "1995",
ISBN = "0-8186-7031-2, 0-8186-7032-0",
ISBN-13 = "978-0-8186-7031-2, 978-0-8186-7032-9",
LCCN = "QA76.58 .E97 1995",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Euromicro Workshop on Parallel and Distributed
Processing 1995; 3rd",
acknowledgement = ack-nhfb,
xxeditor1 = "I. Martin and J. C. Fabero and F. Tirado and A.
Bautista",
xxeditor2 = "V. Gianuzzi and F. Merani",
}
@Proceedings{IEEE:1995:PFI,
editor = "{IEEE}",
booktitle = "{Proceedings of the Fourth IEEE International
Symposium on High Performance Distributed Computing,
August 2--4, 1995, Washington, DC, USA}",
title = "{Proceedings of the Fourth IEEE International
Symposium on High Performance Distributed Computing,
August 2--4, 1995, Washington, DC, USA}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiv + 246",
year = "1995",
ISBN = "0-8186-7088-6",
ISBN-13 = "978-0-8186-7088-6",
LCCN = "QA76.9.D5 I328 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95TB8075.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Tech. Committee on Distrib. Process.; Northeast
Parallel Architectures Centre (NPAC) at Syracuse Univ.;
ACM SIGCOMM; Rome Lab",
sponsor = "IEEE. Computer Society. Technical Committee on
Distributed Processing Northeast Parallel Architectures
Center.",
}
@Proceedings{IEEE:1995:PIC,
editor = "{IEEE}",
booktitle = "{Proceedings of the 15th International Conference on
Distributed Computing Systems: Vancouver, BC, Canada,
30 May--2 June 1995}",
title = "{Proceedings of the 15th International Conference on
Distributed Computing Systems: Vancouver, BC, Canada,
30 May--2 June 1995}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xx + 537",
year = "1995",
ISBN = "0-8186-7025-8",
ISBN-13 = "978-0-8186-7025-1",
LCCN = "????",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95CH35784.",
acknowledgement = ack-nhfb,
corpsource = "IBM Thomas J. Watson Res. Center, Yorktown Heights,
NY, USA",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
treatment = "A Application; P Practical",
}
@Proceedings{IEEE:1995:PIP,
editor = "{IEEE}",
booktitle = "{Proceedings / 9th International Parallel Processing
Symposium, April 25--28, 1995, Santa Barbara,
California}",
title = "{Proceedings / 9th International Parallel Processing
Symposium, April 25--28, 1995, Santa Barbara,
California}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxiii + 851",
year = "1995",
ISBN = "0-8186-7074-6",
ISBN-13 = "978-0-8186-7074-9",
LCCN = "QA 76.58 I56 1995",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95TH8052.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc. Tech. Committee on Parallel
Process",
}
@Proceedings{IEEE:1995:PNA,
editor = "{IEEE}",
booktitle = "{Proceedings: the nineteenth annual International
Computer Software and Applications Conference (COMPSAC
'95): August 9--11, 1995, Dallas, Texas}",
title = "{Proceedings: the nineteenth annual International
Computer Software and Applications Conference (COMPSAC
'95): August 9--11, 1995, Dallas, Texas}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 431",
year = "1995",
ISBN = "0-8186-7119-X",
ISBN-13 = "978-0-8186-7119-7",
LCCN = "QA 76.6 C6295 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog no. 95CB35838.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc",
}
@Proceedings{IEEE:1995:PSI,
editor = "{IEEE}",
booktitle = "{Proceedings / Seventh IEEE Symposium on Parallel and
Distributed Processing, October 25--28, 1995, San
Antonio, Texas}",
title = "{Proceedings / Seventh IEEE Symposium on Parallel and
Distributed Processing, October 25--28, 1995, San
Antonio, Texas}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvii + 724",
year = "1995",
ISBN = "0-8186-7195-5",
ISBN-13 = "978-0-8186-7195-1",
LCCN = "QA 76.58 I42 1995",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 95TB8131.",
acknowledgement = ack-nhfb,
conflocation = "San Antonio, TX, USA; 25-28 Oct. 1995",
confsponsor = "IEEE Comput Soc. Tech. Committee on Comput.
Architecture; IEEE Comput. Soc. Tech. Committee on
Distributed Process.; IEEE Comput. Soc. Dallas
Chapter",
conftitle = "Proceedings of Seventh IEEE Symposium on Parallel and
Distributed Processing",
corpsource = "Div. of Math. and Comput. Sci., Argonne Nat. Lab., IL,
USA",
sponsororg = "IEEE Comput Soc. Tech. Committee on Comput.
Architecture; IEEE Comput. Soc. Tech. Committee on
Distributed Process.; IEEE Comput. Soc. Dallas
Chapter",
treatment = "P Practical",
}
@Proceedings{IEEE:1995:PSP,
editor = "{IEEE}",
booktitle = "{Proceedings of the 1994 Scalable Parallel Libraries
Conference: October 12--14, 1994, Mississippi State
University, Mississippi}",
title = "{Proceedings of the 1994 Scalable Parallel Libraries
Conference: October 12--14, 1994, Mississippi State
University, Mississippi}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "vii + 191",
year = "1995",
ISBN = "0-8186-6895-4",
ISBN-13 = "978-0-8186-6895-1",
LCCN = "QA76.58 .S34 1994",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
conflocation = "Mississippi State, MS, USA; 12-14 Oct. 1994",
confsponsor = "Mississippi State Univ.; NSF",
conftitle = "Proceedings Scalable Parallel Libraries Conference",
corpsource = "Sch. of Comput. Sci., Carnegie Mellon Univ.,
Pittsburgh, PA, USA",
sponsororg = "Mississippi State Univ.; NSF",
treatment = "P Practical",
}
@Proceedings{IFIP:1995:KWC,
editor = "{IFIP Working Group 2.5}",
booktitle = "{Kyoto Workshop 1995: Current Directions in Numerical
Software and High Performance Computing, 19--20 October
1995, Kyoto, Japan}",
title = "{Kyoto Workshop 1995: Current Directions in Numerical
Software and High Performance Computing, 19--20 October
1995, Kyoto, Japan}",
publisher = "????",
address = "????",
pages = "????",
year = "1995",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Jan 24 06:55:27 2001",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.nsc.liu.se/~boein/ifip/kyoto/kyoto.html#reid;
http://www.nsc.liu.se/~boein/ifip/kyoto/workshop-info/proceedings/",
acknowledgement = ack-nhfb,
}
@Proceedings{Levelt:1995:IIS,
editor = "A. H. M. Levelt",
booktitle = "{ISSAC '95: International symposium on symbolic and
algebraic computation --- July 10--12, 1995,
Montr{\'e}al, Canada}",
title = "{ISSAC '95: International symposium on symbolic and
algebraic computation --- July 10--12, 1995,
Montr{\'e}al, Canada}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xviii + 314",
year = "1995",
ISBN = "0-89791-699-9",
ISBN-13 = "978-0-89791-699-8",
LCCN = "QA 76.95 I59 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "ISSAC --- Proceedings",
acknowledgement = ack-nhfb,
}
@Proceedings{Malyshkin:1995:PCT,
editor = "Victor Malyshkin",
booktitle = "{Parallel computing technologies: third international
conference, PaCT-95, St. Petersburg, Russia, September
12--25, 1995: proceedings}",
title = "{Parallel computing technologies: third international
conference, PaCT-95, St. Petersburg, Russia, September
12--25, 1995: proceedings}",
number = "964",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xii + 495",
year = "1995",
ISBN = "3-540-60222-4",
ISBN-13 = "978-3-540-60222-4",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.I547 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
sponsor = "Russian Academy of Sciences. Computing Center
Electrotechnical University of St. Petersburg.",
}
@Proceedings{Nadeau:1995:SVR,
editor = "David R. Nadeau and John L. Moreland",
booktitle = "{1995 Symposium on the Virtual Reality Modeling
Language, VRML '95, San Diego, California, December
14--15, 1995}",
title = "{1995 Symposium on the Virtual Reality Modeling
Language, VRML '95, San Diego, California, December
14--15, 1995}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "139",
year = "1995",
ISBN = "0-89791-818-5",
ISBN-13 = "978-0-89791-818-3",
LCCN = "QA76.76.H94 S95 1995",
bibdate = "Fri Sep 11 08:29:11 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM order number 434953.",
acknowledgement = ack-nhfb,
confsponsor = "San Diego Supercomput. Center; ACM",
keywords = "SGML; Virtual reality --- Congresses; VRML (Computer
program language) --- Congresses",
}
@Proceedings{Narashimhan:1995:IIF,
editor = "V. L. Narashimhan",
booktitle = "{ICAPP 95. IEEE First International Conference on
Algorithms and Architectures for Parallel Processing,
Brisbane, Australia, 19--21 April, 1995}",
title = "{ICAPP 95. IEEE First International Conference on
Algorithms and Architectures for Parallel Processing,
Brisbane, Australia, 19--21 April, 1995}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvii + 961",
year = "1995",
ISBN = "0-7803-2018-2 (paperback), 0-7803-2019-0
(microfiche)",
ISBN-13 = "978-0-7803-2018-5 (paperback), 978-0-7803-2019-2
(microfiche)",
LCCN = "QA76.6.I15 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes. IEEE catalog no. 95TH0682-5.",
acknowledgement = ack-nhfb,
confsponsor = "Parallel Algorithms, Archit. and Software Eng. Res.
Lab.; IEEE; IEEE Comput. Soc.; ACM; Euromicro; IBM;
Instn. Eng. Australia; Inst. Radio and Electron. Eng.
Soc.; Australian Comput. Soc",
}
@Proceedings{Pahl:1995:CCB,
editor = "Peter Jan Pahl and Heinrich Werner",
booktitle = "{Computing in civil and building engineering: 6th
International conference --- July 1995, Berlin}",
title = "{Computing in civil and building engineering: 6th
International conference --- July 1995, Berlin}",
publisher = "A. A. Balkema",
address = "Brookfield, VT, USA",
pages = "xxiv + 1641",
year = "1995",
ISBN = "90-5410-556-9, 90-5410-557-7",
ISBN-13 = "978-90-5410-556-5, 978-90-5410-557-2",
LCCN = "TA345 .I565 1995 v.1-2",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
series = "Computing in Civil and Building Engineering 6th",
acknowledgement = ack-nhfb,
sponsor = "Arbeitskreis Bauinformatik
Technologie-Vermittlungs-Agentur Berlin e.V..",
}
@Proceedings{Pingali:1995:LCP,
editor = "K. Pingali and U. Banerjee and D. Gelernter and A.
Nicolau and D. Padua",
booktitle = "{Languages and compilers for parallel computing: 7th
International Workshop, Ithaca, NY, USA, August 8--10,
1994: proceedings}",
title = "{Languages and compilers for parallel computing: 7th
International Workshop, Ithaca, NY, USA, August 8--10,
1994: proceedings}",
volume = "892",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xl + 496",
year = "1995",
ISBN = "3-540-58868-X",
ISBN-13 = "978-3-540-58868-9",
LCCN = "QA76.58 .W656 1994",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
pubcountry = "Germany",
}
@Proceedings{Prasanna:1995:FIP,
editor = "Viktor K. Prasanna and V. P. Bhatkar and L. M. Patnaik
and S. K. Tripathi",
booktitle = "{First IWPP parallel processing: proceedings of the
First International Workshop on Parallel Processing
(IWPP-94): December 26--31, 1994, Bangalore, India}",
title = "{First IWPP parallel processing: proceedings of the
First International Workshop on Parallel Processing
(IWPP-94): December 26--31, 1994, Bangalore, India}",
publisher = "Taka McGraw-Hill Pub. Co",
address = "New Delhi; New York",
pages = "xxiii + 736",
year = "1995",
ISBN = "0-07-462332-X",
ISBN-13 = "978-0-07-462332-9",
LCCN = "QA 76.58 I587 1994",
bibdate = "Wed Apr 16 14:07:03 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Satofuka:1995:PCF,
editor = "N. Satofuka and Jacques Periaux and Akin Ecer",
booktitle = "{Parallel computational fluid dynamics: new algorithms
and applications: proceedings of the Parallel CFD '94
Conference, Kyoto, Japan, 16--19 May 1994}",
title = "{Parallel computational fluid dynamics: new algorithms
and applications: proceedings of the Parallel CFD '94
Conference, Kyoto, Japan, 16--19 May 1994}",
publisher = pub-ELS,
address = pub-ELS:adr,
pages = "xi + 457",
year = "1995",
ISBN = "0-444-82317-4",
ISBN-13 = "978-0-444-82317-5",
LCCN = "QA911 .P35 1994",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "fluid dynamics -- data processing -- congresses;
parallel processing (electronic computers) --
congresses; supercomputers -- congresses",
}
@Proceedings{Shaw:1995:ADA,
editor = "R. A. (Richard A.) Shaw and H. E. (Harry E.) Payne and
J. J. E. (Jeffrey J. E.) Hayes",
booktitle = "{Astronomical data analysis software and systems IV:
meeting held at Baltimore, Maryland, 25--28 September
1994}",
title = "{Astronomical data analysis software and systems IV:
meeting held at Baltimore, Maryland, 25--28 September
1994}",
volume = "77",
publisher = "Astronomical Society of the Pacific",
address = "San Francisco, CA, USA",
pages = "xxxvi + 533",
year = "1995",
ISBN = "0-937707-96-1",
ISBN-13 = "978-0-937707-96-8",
ISSN = "1080-7926",
LCCN = "QB51.3.E43 A87 1994",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Astronomical Society of the Pacific Conference
Series",
acknowledgement = ack-nhfb,
sponsor = "Astronomical Society of the Pacific.",
}
@Proceedings{Tentner:1995:HPC,
editor = "A. Tentner",
booktitle = "{High Performance Computing Symposium 1995 `Grand
Challenges in Computer Simulation'. Proceedings of the
1995 Simulation Multiconference: Phoenix, AZ, USA,
9--13 April 1995}",
title = "{High Performance Computing Symposium 1995 `Grand
Challenges in Computer Simulation'. Proceedings of the
1995 Simulation Multiconference: Phoenix, AZ, USA,
9--13 April 1995}",
publisher = "Society for Computer Simulation",
address = "San Diego, CA, USA",
pages = "xxiii + 566",
year = "1995",
ISBN = "1-56555-078-1",
ISBN-13 = "978-1-56555-078-0",
LCCN = "????",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
corpsource = "Oak Ridge Nat. Lab., TN, USA",
sponsororg = "SCS",
treatment = "P Practical",
}
@Proceedings{Uselton:1995:PRS,
editor = "Samuel P. Uselton and Michael Brian Cox and Craig M.
Wittenbrink",
booktitle = "{1995 Parallel Rendering Symposium (PRS 95): Atlanta,
Georgia, October 30--31, 1995}",
title = "{1995 Parallel Rendering Symposium (PRS 95): Atlanta,
Georgia, October 30--31, 1995}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "107",
year = "1995",
ISBN = "0-89791-774-1 (softbound) [invalid checksum],
0-7803-3120-6 (microfiche)",
ISBN-13 = "978-0-89791-774-2 (softbound), 978-0-7803-3120-4
(microfiche)",
LCCN = "QA76.58.P3778 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM order number 428957. IEEE Computer Society Press
order number 95TB8134.",
acknowledgement = ack-nhfb,
confsponsor = "IEEE Comput. Soc. Techn. Committee on Comput.
Graphics; ACM SIGGRAPH",
}
@Proceedings{USENIX:1995:PUT,
editor = "{USENIX}",
booktitle = "{Proceedings of the 1995 USENIX Technical Conference,
January 16--20, 1995, New Orleans, Louisiana, USA}",
title = "{Proceedings of the 1995 USENIX Technical Conference,
January 16--20, 1995, New Orleans, Louisiana, USA}",
publisher = pub-USENIX,
address = pub-USENIX:adr,
pages = "325",
year = "1995",
ISBN = "1-880446-67-7",
ISBN-13 = "978-1-880446-67-6",
LCCN = "QA 76.76 O63 U88 1995",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Vandoni:1995:CSC,
editor = "C. E. Vandoni and C. Verkerk",
booktitle = "{1994 CERN School of Computing: Sopron, Hungary, 28
August--10 September 1994: proceedings}",
title = "{1994 CERN School of Computing: Sopron, Hungary, 28
August--10 September 1994: proceedings}",
publisher = "CERN",
address = "Geneva, Switzerland",
pages = "ix + 336",
year = "1995",
ISBN = "92-9083-069-7",
ISBN-13 = "978-92-9083-069-6",
bibdate = "Sun Dec 22 10:20:45 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "CERN report 95-01.",
acknowledgement = ack-nhfb,
pubcountry = "Switzerland",
}
@Proceedings{VanKatwijk:1995:AAC,
editor = "Jan {Van Katwijk}",
booktitle = "{ACSCI '95: 1st Annual conference --- May 1995,
Heijen, The Netherlands}",
title = "{ACSCI '95: 1st Annual conference --- May 1995,
Heijen, The Netherlands}",
publisher = "ASCI",
address = "Delft, The Netherlands",
pages = "xi + 450",
year = "1995",
ISBN = "90-90-08344-8",
ISBN-13 = "978-90-90-08344-5",
LCCN = "QA75.5 .A38x 1995",
bibdate = "Thu Feb 29 17:59:11 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Proceedings of the Annual Conference --- Advanced
School for Computing and Imaging, 1st",
acknowledgement = ack-nhfb,
sponsor = "Advanced School for Computing and Imaging",
}
@Proceedings{Abrahart:1996:GIC,
editor = "R. J. Abrahart",
booktitle = "{GeoComputation 96. 1st International Conference on
GeoComputation: Leeds, UK, 17--19 September 1996}",
title = "{GeoComputation 96. 1st International Conference on
GeoComputation: Leeds, UK, 17--19 September 1996}",
publisher = "????",
address = "????",
pages = "????",
year = "1996",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Apr 16 14:19:17 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1996:FCP,
editor = "{ACM}",
booktitle = "{FCRC '96: Conference proceedings of the 1996
International Conference on Supercomputing:
Philadelphia, Pennsylvania, USA, May 25--28, 1996}",
title = "{FCRC '96: Conference proceedings of the 1996
International Conference on Supercomputing:
Philadelphia, Pennsylvania, USA, May 25--28, 1996}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "xii + 406",
year = "1996",
ISBN = "0-89791-803-7",
ISBN-13 = "978-0-89791-803-9",
LCCN = "QA76.5 I61 1996",
bibdate = "Wed Mar 18 12:33:29 MST 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM order number 415961.",
acknowledgement = ack-nhfb,
keywords = "Supercomputers --- Congresses.",
}
@Proceedings{ACM:1996:SCP,
editor = "{ACM}",
booktitle = "{Supercomputing '96 Conference Proceedings: November
17--22, Pittsburgh, PA}",
title = "{Supercomputing '96 Conference Proceedings: November
17--22, Pittsburgh, PA}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "1996",
ISBN = "0-89791-854-1",
ISBN-13 = "978-0-89791-854-1",
LCCN = "QA 76.88 S8573 1996",
bibdate = "Tue May 12 08:55:21 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM Order Number: 415962, IEEE Computer Society Press
Order Number: RS00126.",
URL = "http://www.supercomp.org/sc96/proceedings/",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1996:SVR,
editor = "{ACM}",
booktitle = "{1995 Symposium on the Virtual Reality Modeling
Language (VRML `95)}",
title = "{1995 Symposium on the Virtual Reality Modeling
Language (VRML `95)}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "139",
year = "1996",
ISBN = "0-89791-818-5",
ISBN-13 = "978-0-89791-818-3",
LCCN = "????",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.acm.org/pubs/contents/proceedings/graph/217306/",
acknowledgement = ack-nhfb,
conflocation = "San Diego, CA, USA; 14-15 Dec. 1995",
conftitle = "Proceedings of 1995 VMRL Workshop",
corpsource = "Visual Comput. Lab., California Univ., San Diego, La
Jolla, CA, USA",
sponsororg = "San Diego Supercomput. Center; ACM",
treatment = "P Practical",
}
@Proceedings{Bode:1996:PVM,
editor = "Arndt Bode and Jack Dongarra and T. Ludwig and V.
Sunderam",
booktitle = "{Parallel virtual machine, EuroPVM '96: third European
PVM conference, Munich, Germany, October 7--9, 1996:
proceedings}",
title = "{Parallel virtual machine, EuroPVM '96: third European
PVM conference, Munich, Germany, October 7--9, 1996:
proceedings}",
volume = "1156",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xiv + 362",
year = "1996",
ISBN = "3-540-61779-5",
ISBN-13 = "978-3-540-61779-2",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.E975 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
conflocation = "Munich, Germany; 7-9 Oct. 1996",
conftitle = "Parallel Virtual Machine - EuroPVM '96. Third European
PVM Conference. Proceedings",
corpsource = "Computations and Commun. Res. Labs., NEC Europe Ltd.,
Sankt Augustin, Germany",
keywords = "Parallel computers -- Congresses; Virtual computer
systems -- Congresses.",
pubcountry = "Germany",
treatment = "P Practical",
}
@Proceedings{Boszormenyi:1996:PCT,
editor = "Laszlo Boszormenyi",
booktitle = "{Parallel computation: Third International ACPC
Conference with special emphasis on parallel databases
and parallel I/O, Klagenfurt, Austria, September
23--25, 1996: proceedings}",
title = "{Parallel computation: Third International ACPC
Conference with special emphasis on parallel databases
and parallel I/O, Klagenfurt, Austria, September
23--25, 1996: proceedings}",
volume = "1127",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xi + 234",
year = "1996",
ISBN = "3-540-61695-0",
ISBN-13 = "978-3-540-61695-5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA267.A1 L43 no.1127",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
keywords = "parallel processing (electronic computers) --
congresses",
}
@Proceedings{Bouge:1996:EPP,
editor = "Luc Bouge and P. Fraigniaud and A. Mignotte and Y.
Robert",
booktitle = "{Euro-Par '96 parallel processing: second
International Euro-Par Conference, Lyon, France, August
26--29, 1996: proceedings}",
title = "{Euro-Par '96 parallel processing: second
International Euro-Par Conference, Lyon, France, August
26--29, 1996: proceedings}",
volume = "1123--1124",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xxxiii + 842 (vol. 1), 926 (vol. 2)",
year = "1996",
ISBN = "3-540-61626-8 (vol. 1), 3-540-61627-6 (vol. 2)",
ISBN-13 = "978-3-540-61626-9 (vol. 1), 978-3-540-61627-6 (vol.
2)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.I554 1996, QA267.A1 L43 no.1123-1124",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Two volumes.",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
conflocation = "Lyon, France; 26-29 Aug. 1996",
conftitle = "Proceedings of European Conference on Parallel
Processing EURO-PAR '96",
corpsource = "Oak Ridge Nat. Lab., TN, USA",
keywords = "parallel processing (electronic computers) --
congresses",
pubcountry = "Germany",
treatment = "P Practical",
}
@Proceedings{Ciancarini:1996:CLM,
editor = "Paolo Ciancarini and Chris Hankin",
booktitle = "{Coordination languages and models: First
International Conference COORDINATION '96, Cesena,
Italy, April 15--17, 1996: proceedings}",
title = "{Coordination languages and models: First
International Conference COORDINATION '96, Cesena,
Italy, April 15--17, 1996: proceedings}",
number = "1061",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xi + 443",
year = "1996",
ISBN = "3-540-61052-9",
ISBN-13 = "978-3-540-61052-6",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.I52 1996",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:1996:APC,
editor = "Jack J. Dongarra and Kay Madsen and Jerzy Wasniewski",
booktitle = "{Applied parallel computing: computations in physics,
chemistry, and engineering science: second
international workshop, PARA '95, Lyngby, Denmark,
August 21--24, 1995: proceedings}",
title = "{Applied parallel computing: computations in physics,
chemistry, and engineering science: second
international workshop, PARA '95, Lyngby, Denmark,
August 21--24, 1995: proceedings}",
volume = "1041",
publisher = pub-SV,
address = pub-SV:adr,
pages = "562",
year = "1996",
ISBN = "3-540-60902-4",
ISBN-13 = "978-3-540-60902-5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.P35 1995",
bibdate = "Wed Aug 14 10:49:23 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
sponsor = "Danish Computing Centre for Research and Education
Technical University of Denmark. Institute of
Mathematical Modeling Danish Natural Science Research
Council.",
}
@Proceedings{El-Rewini:1996:PTN,
editor = "Hesham El-Rewini and Bruce D. Shriver",
booktitle = "{Proceedings of the Twenty-Ninth Hawaii International
Conference on System Sciences (HICSS-29): Wailea, HI,
USA, 3--6 January 1996}",
title = "{Proceedings of the Twenty-Ninth Hawaii International
Conference on System Sciences (HICSS-29): Wailea, HI,
USA, 3--6 January 1996}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "various",
year = "1996",
ISBN = "0-8186-7324-9",
ISBN-13 = "978-0-8186-7324-5",
ISSN = "1060-3425",
LCCN = "????",
bibdate = "Wed Apr 16 14:12:08 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Five volumes.",
acknowledgement = ack-nhfb,
}
@Proceedings{Grangeat:1996:PTI,
editor = "Pierre Grangeat and Jean-Louis Amans",
booktitle = "{Proceedings of the Third International Meeting on
Fully Three-Dimensional Image Reconstruction in
Radiology and Nuclear Medicine, held July 4--6, 1995 at
Domaine d'Aix-Marlioz, Aix-les-Bains, France}",
title = "{Proceedings of the Third International Meeting on
Fully Three-Dimensional Image Reconstruction in
Radiology and Nuclear Medicine, held July 4--6, 1995 at
Domaine d'Aix-Marlioz, Aix-les-Bains, France}",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "x + 315",
year = "1996",
ISBN = "0-7923-4129-5",
ISBN-13 = "978-0-7923-4129-1",
LCCN = "R857.T47 T485 1996",
bibdate = "Wed Apr 16 10:20:43 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Grinstein:1996:VDE,
editor = "Georges G. Grinstein and Robert F. Erbacher",
booktitle = "{Visual data exploration and analysis III: 31
January--2 February, 1996, San Jose, California}",
title = "{Visual data exploration and analysis III: 31
January--2 February, 1996, San Jose, California}",
volume = "2421 (or 2656??)",
publisher = pub-SPIE,
address = pub-SPIE:adr,
pages = "ix + 404",
year = "1996",
CODEN = "PSISDG",
ISBN = "0-8194-2030-1",
ISBN-13 = "978-0-8194-2030-5",
ISSN = "0277-786X (print), 1996-756X (electronic)",
LCCN = "TS510.S63 v.2656",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = j-PROC-SPIE,
acknowledgement = ack-nhfb,
confsponsor = "SPIE; Soc. Imaginag Sci. and Technol",
}
@Proceedings{IEEE:1996:EIS,
editor = "{IEEE}",
booktitle = "{Eighth IEEE Symposium on Parallel and Distributed
Processing: October 23--26, 1996, New Orleans,
Louisiana}",
title = "{Eighth IEEE Symposium on Parallel and Distributed
Processing: October 23--26, 1996, New Orleans,
Louisiana}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xv + 618",
year = "1996",
ISBN = "0-8186-7683-3, 0-8186-7685-X (microfiche)",
ISBN-13 = "978-0-8186-7683-3, 978-0-8186-7685-7 (microfiche)",
LCCN = "QA76.58 .I42 1996",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE Computer Society Press order number PR07683. IEEE
Order Plan catalog number 96TB100088.",
acknowledgement = ack-nhfb,
keywords = "electronic data processing -- distributed processing
-- congresses; parallel processing (electronic
computers) -- congresses",
}
@Proceedings{IEEE:1996:FSS,
editor = "{IEEE}",
booktitle = "{Frontiers'96, the Sixth Symposium on the Frontiers of
Massively Parallel Computation: October 27--31, 1996,
Annapolis, Maryland: proceedings}",
title = "{Frontiers'96, the Sixth Symposium on the Frontiers of
Massively Parallel Computation: October 27--31, 1996,
Annapolis, Maryland: proceedings}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiv + 372",
year = "1996",
ISBN = "0-8186-7551-9",
ISBN-13 = "978-0-8186-7551-5",
LCCN = "QA76.58 .S95 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 96TB100062.",
acknowledgement = ack-nhfb,
corpsource = "Numerical Aerodynamic Simulation, NASA Ames Res.
Center, Moffett Field, CA, USA; Centro Svizzero di
Calcolo Sci., Manno, Switzerland",
sponsororg = "IEEE Comput. Soc.; NASA Goddard Space Flight Center;
URSA/CESDIS",
treatment = "P Practical",
}
@Proceedings{IEEE:1996:ICH,
editor = "{IEEE}",
booktitle = "{3rd International Conference on High Performance
Computing: proceedings, December 19--22, 1996,
Trivandrum, India}",
title = "{3rd International Conference on High Performance
Computing: proceedings, December 19--22, 1996,
Trivandrum, India}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvi + 476",
year = "1996",
ISBN = "0-8186-7557-8",
ISBN-13 = "978-0-8186-7557-7",
LCCN = "QA76.88.I575 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 96TB100074.",
acknowledgement = ack-nhfb,
conflocation = "Trivandrum, India; 19-22 Dec. 1996",
conftitle = "Proceedings of 3rd International Conference on High
Performance Computing (HiPC)",
corpsource = "Software Technol. Group, Swiss Center for Sci.
Comput., Manno, Switzerland; Div. of Math. and Comput.
Sci., Argonne Nat. Lab., IL, USA",
sponsororg = "IEEE Comput. Soc.; IEEE Comput. Soc. Tech. Committee
on Parallel Process.; ACM SIGARCH",
treatment = "P Practical",
}
@Proceedings{IEEE:1996:PFE,
editor = "{IEEE}",
booktitle = "{Proceedings of the fourth Euromicro Workshop on
Parallel and Distributed Processing (PDP '96): January
24--26, 1996, Braga, Portugal}",
title = "{Proceedings of the fourth Euromicro Workshop on
Parallel and Distributed Processing (PDP '96): January
24--26, 1996, Braga, Portugal}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xv + 551",
year = "1996",
ISBN = "0-8186-7376-1",
ISBN-13 = "978-0-8186-7376-4",
LCCN = "QA76.58 .E97 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE order number PR07376.",
acknowledgement = ack-nhfb,
conflocation = "Braga, Portugal; 24-26 Jan. 1996",
conftitle = "Proceedings of 4th Euromicro Workshop on Parallel and
Distributed Processing",
corpsource = "Coimbra Univ., Portugal",
keywords = "electronic data processing -- distributed processing
-- congresses; parallel processing (electronic
computers) -- congresses; parallel programming
(computer science) -- congresses",
treatment = "P Practical",
}
@Proceedings{IEEE:1996:PFI,
editor = "{IEEE}",
booktitle = "{Proceedings of the Fifth IEEE International Symposium
on High Performance Distributed Computing, Syracuse,
NY, USA, 6--9 August 1996}",
title = "{Proceedings of the Fifth IEEE International Symposium
on High Performance Distributed Computing, Syracuse,
NY, USA, 6--9 August 1996}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xviii + 642",
year = "1996",
ISBN = "0-8186-7582-9",
ISBN-13 = "978-0-8186-7582-9",
LCCN = "QA 76.88 I52 1996",
bibdate = "Tue May 12 08:55:41 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number TB100069.",
acknowledgement = ack-nhfb,
corpsource = "NSF Eng. Res. Center for Comput. Field Simulation,
Mississippi State Univ., MS, USA",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process.; Northeast Parallel Architectures Center; New
York State Center for Adv. Technol. Comput.
Applications and Software Eng. (CASE Center) at
Syracuse Univ.; Rome Lab",
treatment = "P Practical",
}
@Proceedings{IEEE:1996:PII,
editor = "{IEEE}",
booktitle = "{Proceedings of IPPS '96. The 10th International
Parallel Processing Symposium: Honolulu, HI, USA,
15--19 April 1996}",
title = "{Proceedings of IPPS '96. The 10th International
Parallel Processing Symposium: Honolulu, HI, USA,
15--19 April 1996}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxviii + 903",
year = "1996",
ISBN = "0-8186-7255-2",
ISBN-13 = "978-0-8186-7255-2",
LCCN = "QA76.58 .I565 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 96TB100038. IEEE Computer Society
Press order number PR07255.",
acknowledgement = ack-nhfb,
conflocation = "Honolulu, HI, USA; 15-19 April 1996",
conftitle = "Proceedings of International Conference on Parallel
Processing",
corpsource = "Mississippi State Univ., MS, USA; Inst. fur Inf.,
Tech. Univ. Munchen, Germany",
sponsororg = "IEEE Comput. Tech. Committee on Parallel Process.; ACM
SIGARCH",
treatment = "P Practical",
}
@Proceedings{IEEE:1996:PIS,
editor = "{IEEE}",
booktitle = "{Proceedings of 1996 IEEE Second International
Conference on Algorithms and Architectures for Parallel
Processing, ICA PP '96: June 11--13, 1996, Singapore}",
title = "{Proceedings of 1996 IEEE Second International
Conference on Algorithms and Architectures for Parallel
Processing, ICA PP '96: June 11--13, 1996, Singapore}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xix + 547",
year = "1996",
ISBN = "0-7803-3529-5 (softbound), 0-7803-3530-9
(microfiche)",
ISBN-13 = "978-0-7803-3529-5 (softbound), 978-0-7803-3530-1
(microfiche)",
LCCN = "QA76.58.I33 1996",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 96TH8204.",
acknowledgement = ack-nhfb,
keywords = "electronic digital computers -- programming --
congresses; multiprocessors -- programming --
congresses; parallel processing (electronic computers)
-- congresses",
}
@Proceedings{IEEE:1996:PSI,
editor = "{IEEE}",
booktitle = "{Proceedings of the Seventh Israeli Conference on
Computer Systems and Software Engineering: June 12--13,
1996, Herzliya, Israel}",
title = "{Proceedings of the Seventh Israeli Conference on
Computer Systems and Software Engineering: June 12--13,
1996, Herzliya, Israel}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "viii + 151",
year = "1996",
ISBN = "0-8186-7536-5",
ISBN-13 = "978-0-8186-7536-2",
LCCN = "QA75.5 .I75 1996",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE Computer Society Press Order Number PR07536.",
acknowledgement = ack-nhfb,
keywords = "software engineering -- Israel -- congresses; system
design -- congresses",
}
@Proceedings{IEEE:1996:PSM,
editor = "{IEEE}",
booktitle = "{Proceedings. Second MPI Developer's Conference: Notre
Dame, IN, USA, 1--2 July 1996}",
title = "{Proceedings. Second MPI Developer's Conference: Notre
Dame, IN, USA, 1--2 July 1996}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "ix + 207",
year = "1996",
ISBN = "0-8186-7533-0",
ISBN-13 = "978-0-8186-7533-1",
LCCN = "QA76.642 .M67 1996",
bibdate = "Tue May 12 08:56:04 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsororg = "IEEE Comput. Soc. Tech. Committee on Distributed
Process",
}
@Proceedings{Jacoby:1996:ADA,
editor = "G. H. (George H.) Jacoby and Jeannette V. Barnes",
booktitle = "{Astronomical data analysis software and systems V:
meeting held at Tucson, Arizona, 23--25 October 1995}",
title = "{Astronomical data analysis software and systems V:
meeting held at Tucson, Arizona, 23--25 October 1995}",
volume = "101",
publisher = "Astronomical Society of the Pacific",
address = "San Francisco, CA, USA",
pages = "xxxvii + 607",
year = "1996",
ISBN = "????",
ISBN-13 = "????",
ISSN = "1080-7926",
LCCN = "QB51.3.E43 A87 1995",
bibdate = "Wed Apr 16 14:14:55 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Astronomical Society of the Pacific Conference
Series",
acknowledgement = ack-nhfb,
}
@Proceedings{Jain:1996:IOP,
editor = "Ravi Jain and John Werth and James C. Browne",
booktitle = "{Input\slash output and parallel and distributed
computer systems}",
title = "{Input\slash output and parallel and distributed
computer systems}",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "xiv + 395",
year = "1996",
ISBN = "0-7923-9735-5",
ISBN-13 = "978-0-7923-9735-9",
LCCN = "QA76.58.I485 1996",
bibdate = "Mon Apr 21 11:26:01 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Li:1996:PSI,
editor = "G.-J. Li and D. F. Hsu and S. Horiguchi and B. Maggs",
booktitle = "{Proceedings. Second International Symposium on
Parallel Architectures, Algorithms, and Networks
(I-SPAN '96): June 12--14, 1996, Beijing, China}",
title = "{Proceedings. Second International Symposium on
Parallel Architectures, Algorithms, and Networks
(I-SPAN '96): June 12--14, 1996, Beijing, China}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xiii + 567",
year = "1996",
ISBN = "0-8186-7460-1",
ISBN-13 = "978-0-8186-7460-0",
LCCN = "QA76.58.I5673 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 96TB100044.",
acknowledgement = ack-nhfb,
corpsource = "Dept. of Comput. Sci., Australian Nat. Univ.,
Canberra, ACT, Australia",
sponsororg = "Chinese Nat. Res. Center for Intelligent Comput.
Syst.; IEEE Comput. Soc.; IEEE Comput. Soc. Tech.
Committee on Parallel Process.; Steering Committee of
the Chinese Nat. Hi-Tech Programme; Inf. Process. Soc.
Japan; Chinese Comput. Federation; IEICE Inf. and Syst.
Soc",
treatment = "P Practical",
}
@Proceedings{Li:1996:SIS,
editor = "Guo-Jie Li",
booktitle = "{Second International Symposium on Parallel
Architectures, Algorithms, and Networks (I-SPAN '96):
proceedings, June 12--14, 1996, Beijing, China}",
title = "{Second International Symposium on Parallel
Architectures, Algorithms, and Networks (I-SPAN '96):
proceedings, June 12--14, 1996, Beijing, China}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xi + 567",
year = "1996",
ISBN = "0-8186-7460-1",
ISBN-13 = "978-0-8186-7460-0",
LCCN = "QA76.58.I565 1996",
bibdate = "Sat Oct 21 15:20:00 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 94TH0697-3.",
acknowledgement = ack-nhfb,
keywords = "computer algorithms -- congresses; computer
architecture -- congresses; computer networks --
congreses; parallel processing (electronic computers)
-- congresses",
}
@Proceedings{Liddell:1996:HPC,
editor = "Heather Mary Liddell and A. Colbrook and B.
Hertzberger and P. Sloot",
booktitle = "{High-performance computing and networking:
international conference and exhibition, HPCN EUROPE
1966, Brussels, Belgium, April 15--19, 1996:
proceedings}",
title = "{High-performance computing and networking:
international conference and exhibition, HPCN EUROPE
1966, Brussels, Belgium, April 15--19, 1996:
proceedings}",
volume = "1067",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xxv + 1040",
year = "1996",
ISBN = "3-540-61142-8 (paperback)",
ISBN-13 = "978-3-540-61142-4 (paperback)",
LCCN = "QA76.88 .H52 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
conflocation = "Brussels, Belgium; 15-19 April 1996",
conftitle = "High-Performance Computing and Networking.
International Conference and Exhibition HPCN Europe
1996",
corpsource = "Zentrum fur Paralleles Rechnen, Koln Univ., Germany;
German Nat. Res. Center for Inf. Technol., St.
Augustin, Germany; Dept. of Electron. and Comput. Sci.,
Southampton Univ., UK; Dept. of Inf., Basel Univ.,
Switzerland",
keywords = "computer networks -- congresses; supercomputers --
congresses",
pubcountry = "Germany",
treatment = "T Theoretical or Mathematical; P Practical",
}
@Proceedings{Reeves:1996:PIC,
editor = "A. Reeves",
booktitle = "{Proceedings of the 1996 International Conference on
Challenges for Parallel Processing, Ithaca, NY, USA,
August 12, 1996}",
title = "{Proceedings of the 1996 International Conference on
Challenges for Parallel Processing, Ithaca, NY, USA,
August 12, 1996}",
volume = "1",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xvi + 278 (vol. 1), xv + 173 (vol. 2), 230 (vol. 3)",
year = "1996",
ISBN = "0-8186-7623-X",
ISBN-13 = "978-0-8186-7623-9",
LCCN = "QA76.58 .I34 1996",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Three volumes.",
acknowledgement = ack-nhfb,
conftitle = "Proceedings of 25th International Conference on
Parallel Processing",
corpsource = "Comput. Sci. Div., Berkeley Univ., CA, USA",
sponsororg = "Int. Assoc. Comput. and Commun.; Pennsylvania State
Univ",
treatment = "P Practical",
xxeditor = "Howard Jay Segal",
}
@Proceedings{Silvester:1996:SEE,
editor = "P. P. Silvester",
booktitle = "{Software for electrical engineering analysis and
design: Third International Conference on Software for
Electrical Engineering Analysis and Design, Electrosoft
'96, Pisa, Italy}",
title = "{Software for electrical engineering analysis and
design: Third International Conference on Software for
Electrical Engineering Analysis and Design, Electrosoft
'96, Pisa, Italy}",
publisher = "Computational Mechanics Publications",
address = "Boston, MA, USA",
pages = "509",
year = "1996",
ISBN = "1-85312-395-1",
ISBN-13 = "978-1-85312-395-5",
LCCN = "TK5.I59 1996",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "electric engineering -- computer programs --
congresses",
}
@Proceedings{Szymanski:1996:LCR,
editor = "Boleslaw K. Szymanski and Balaram Sinharoy",
booktitle = "{Languages, Compilers and Run-Time Systems for
Scalable Computers, 22--24 May 1995, Troy, NY, USA}",
title = "{Languages, Compilers and Run-Time Systems for
Scalable Computers, 22--24 May 1995, Troy, NY, USA}",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "xiv + 335",
year = "1996",
ISBN = "0-7923-9635-9",
ISBN-13 = "978-0-7923-9635-2",
LCCN = "QA76.58.L37 1996",
bibdate = "Sun Dec 22 10:19:23 MST 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Toussaint:1996:AES,
editor = "Marcel Toussaint",
booktitle = "{Ada in Europe: Second International
Eurospace-Ada-Europe Symposium, Frankfurt\slash Main,
Germany, October 2--6, 1995: proceedings}",
title = "{Ada in Europe: Second International
Eurospace-Ada-Europe Symposium, Frankfurt\slash Main,
Germany, October 2--6, 1995: proceedings}",
number = "1031",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xi + 455",
year = "1996",
ISBN = "3-540-60757-9",
ISBN-13 = "978-3-540-60757-1",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.73.A35I57 1995",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
sponsor = "Eurospace Ada-Europe.",
}
@Proceedings{Wasniewski:1996:APC,
editor = "Jerzy Wasniewski",
booktitle = "{Applied parallel computing: industrial computation
and optimization: Third International Workshop, PARA
'96, Lyngby, Denmark, August 18--21, 1996:
proceedings}",
title = "{Applied parallel computing: industrial computation
and optimization: Third International Workshop, PARA
'96, Lyngby, Denmark, August 18--21, 1996:
proceedings}",
volume = "1184",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xiii + 722",
year = "1996",
ISBN = "3-540-62095-8",
ISBN-13 = "978-3-540-62095-2",
LCCN = "QA76.58 .P35 1996",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Lecture notes in computer science",
acknowledgement = ack-nhfb,
keywords = "parallel processing (electronic computers) --
congresses",
}
@Proceedings{Yetongnon:1996:PII,
editor = "K. Yetongnon and S. Hariri",
booktitle = "{Proceedings of the ISCA International Conference.
Parallel and Distributed Computing Systems: Dijon,
France, 25--27 September 1996 (PDCS '96: 9th)}",
title = "{Proceedings of the ISCA International Conference.
Parallel and Distributed Computing Systems: Dijon,
France, 25--27 September 1996 (PDCS '96: 9th)}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "????",
year = "1996",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Apr 16 14:20:56 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Zaky:1996:PDT,
editor = "Amr Zaky and Ted Lewis",
booktitle = "Tools and environments for parallel and distributed
systems",
title = "{Program development tools and environments for
parallel and distributed systems: Session; 28th Hawaii
international conference on system sciences --- 1995}",
volume = "2",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "viii + 305",
year = "1996",
ISBN = "0-7923-9675-8",
ISBN-13 = "978-0-7923-9675-8",
LCCN = "QA76.58.T65 1996",
bibdate = "Wed Aug 14 09:02:28 MDT 1996",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "Kluwer International Series in Software Engineering",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1997:PPS,
editor = "{ACM}",
booktitle = "{PASCO '97. Proceedings of the second international
symposium on parallel symbolic computation, July
20--22, 1997, Maui, HI}",
title = "{PASCO '97. Proceedings of the second international
symposium on parallel symbolic computation, July
20--22, 1997, Maui, HI}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "????",
year = "1997",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Mar 12 07:30:53 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
xxnote = "Check editor; proceedings not yet in LC, UC-Melvyl, or
OCLC??",
}
@Proceedings{ACM:1997:SHP,
editor = "{ACM}",
booktitle = "{SC'97: High Performance Networking and Computing:
Proceedings of the 1997 ACM\slash IEEE SC97 Conference:
November 15--21, 1997, San Jose, California, USA}",
title = "{SC'97: High Performance Networking and Computing:
Proceedings of the 1997 ACM\slash IEEE SC97 Conference:
November 15--21, 1997, San Jose, California, USA}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "vii + 159",
year = "1997",
ISBN = "0-89791-985-8",
ISBN-13 = "978-0-89791-985-2",
LCCN = "QA76.9.A25 A265 1997",
bibdate = "Sat Mar 21 09:10:00 1998",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM SIGARCH order number 415972. IEEE Computer Society
Press order number RS00160.",
URL = "http://www.acm.org/pubs/contents/proceedings/commsec/266741/;
http://www.supercomp.org/sc97/proceedings/",
acknowledgement = ack-nhfb,
xxnote = "Check ISBN: UC/Melvyl has this one for ``Proceedings /
Second ACM Workshop on Role-Based Access Control,
Fairfax, Virginia, USA, November 6--7, 1997''.",
}
@Proceedings{Boisvert:1997:QNS,
editor = "R. F. Boisvert",
booktitle = "{Quality of numerical software: assessment and
enhancement / proceedings of the IFIP TC2/WG2.5 Working
Conference on the Quality of Numerical Software,
Assessment and Enhancement, Oxford, United Kingdom,
8--12 July 1996}",
title = "{Quality of numerical software: assessment and
enhancement / proceedings of the IFIP TC2/WG2.5 Working
Conference on the Quality of Numerical Software,
Assessment and Enhancement, Oxford, United Kingdom,
8--12 July 1996}",
publisher = pub-CHAPMAN-HALL,
address = pub-CHAPMAN-HALL:adr,
pages = "vii + 384",
year = "1997",
ISBN = "0-412-80530-8",
ISBN-13 = "978-0-412-80530-1",
LCCN = "QA297 .I35 1996",
bibdate = "Thu Sep 16 09:48:36 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
sponsor = "IFIP; Technical Committee 2/Working Group 2.5.",
}
@Proceedings{Bubak:1997:RAP,
editor = "Marian Bubak and J. J. Dongarra and Jerzy Wasniewski",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 4th European PVM\slash MPI
user's group meeting Cracow, Poland, November 3--5,
1997: proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 4th European PVM\slash MPI
user's group meeting Cracow, Poland, November 3--5,
1997: proceedings}",
volume = "1332",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xv + 518",
year = "1997",
CODEN = "LNCSD9",
ISBN = "3-540-63697-8 (paperback)",
ISBN-13 = "978-3-540-63697-7 (paperback)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58.E973 1997",
bibdate = "Mon Nov 24 09:49:54 MST 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "Computer networks -- Congresses.; Parallel computers
-- Congresses.",
}
@Proceedings{IEEE:1997:APD,
editor = "{IEEE}",
booktitle = "{Advances in parallel and distributed computing: March
19--21, 1997, Shanghai, China: proceedings}",
title = "{Advances in parallel and distributed computing: March
19--21, 1997, Shanghai, China: proceedings}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xii + 426",
year = "1997",
ISBN = "0-8186-7876-3 (paperback and case), 0-8186-7878-X
(microfiche)",
ISBN-13 = "978-0-8186-7876-9 (paperback and case),
978-0-8186-7878-3 (microfiche)",
LCCN = "QA76.58 .A4 1997",
bibdate = "Wed Apr 16 07:34:31 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
keywords = "electronic data processing -- distributed processing
-- congresses; parallel processing (electronic
computers) -- congresses",
}
@Proceedings{IEEE:1997:PIP,
editor = "{IEEE}",
booktitle = "{Proceedings. 11th International Parallel Processing
Symposium, April 1--5, 1997, Geneva, Switzerland}",
title = "{Proceedings. 11th International Parallel Processing
Symposium, April 1--5, 1997, Geneva, Switzerland}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xxi + 765",
year = "1997",
ISBN = "0-8186-7793-7",
ISBN-13 = "978-0-8186-7793-9",
LCCN = "QA76.58 .I56 1997",
bibdate = "Thu May 21 19:02:04 MDT 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 97TB100107. IEEE Computer Society
Press order number PR07792",
acknowledgement = ack-nhfb,
conftitle = "Proceedings 11th International Parallel Processing
Symposium",
corpsource = "Dept. of Comput. Sci., Utah Univ., Salt Lake City, UT,
USA",
sponsororg = "IEEE Comput. Soc. Tech. Committee on Parallel
Process.; ACM SIGARCH; Eur. Assoc. Theor. Comput. Sci.
(EATCS); Swiss Special Interest Group on Parallelism
(SIPAR); SPPEDUP Soc",
treatment = "P Practical",
}
@Proceedings{IEEE:1997:TIS,
editor = "{IEEE}",
booktitle = "{Third International Symposium on High-Performance
Computer Architecture: proceedings, February 1--5,
1997, San Antonio, Texas}",
title = "{Third International Symposium on High-Performance
Computer Architecture: proceedings, February 1--5,
1997, San Antonio, Texas}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "xi + 353",
year = "1997",
ISBN = "0-8186-7764-3",
ISBN-13 = "978-0-8186-7764-9",
LCCN = "QA76.9.A73I566 1997",
bibdate = "Sat Apr 19 16:34:54 MDT 1997",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE catalog number 97TB100094.",
acknowledgement = ack-nhfb,
corpsource = "Hong Kong Univ., Hong Kong",
sponsororg = "IEEE Computer. Soc. Tech. Committee on Comput.
Archit",
treatment = "P Practical",
}
@Proceedings{ACM:1998:AWJ,
editor = "{ACM}",
booktitle = "{ACM 1998 Workshop on Java for High-Performance
Network Computing}",
title = "{ACM 1998 Workshop on Java for High-Performance
Network Computing}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "????",
year = "1998",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Thu Apr 27 10:40:59 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Possibly unpublished, except electronically.",
URL = "http://www.cs.ucsb.edu/conferences/java98/program.html",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:1998:SHP,
editor = "{ACM}",
booktitle = "{SC'98: High Performance Networking and Computing:
Proceedings of the 1998 ACM\slash IEEE SC98 Conference:
Orange County Convention Center, Orlando, Florida, USA,
November 7--13, 1998}",
title = "{SC'98: High Performance Networking and Computing:
Proceedings of the 1998 ACM\slash IEEE SC98 Conference:
Orange County Convention Center, Orlando, Florida, USA,
November 7--13, 1998}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "1998",
ISBN = "????",
ISBN-13 = "????",
LCCN = "????",
bibdate = "Wed Oct 07 08:51:34 1998",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.supercomp.org/sc98/papers/",
acknowledgement = ack-nhfb,
}
@Proceedings{Alexandrov:1998:RAP,
editor = "Vassil Alexandrov and J. J. Dongarra",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 5th European PVM\slash MPI
User's Group Meeting, Liverpool, UK, September 7--9,
1998: proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 5th European PVM\slash MPI
User's Group Meeting, Liverpool, UK, September 7--9,
1998: proceedings}",
volume = "1497",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xii + 412",
year = "1998",
ISBN = "3-540-65041-5 (softcover)",
ISBN-13 = "978-3-540-65041-6 (softcover)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA267.A1 L43 no.1497",
bibdate = "Mon May 3 11:00:13 MDT 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Jointly sponsored by the Computer Science Dept.,
University of Liverpool and Oak Ridge National
Laboratory.",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "data transmission systems -- congresses; parallel
computers -- congresses; virtual computer systems --
congresses",
}
@Proceedings{ACM:1999:SPO,
editor = "{ACM}",
booktitle = "{SC'99: Oregon Convention Center 777 NE Martin Luther
King Jr. Boulevard, Portland, Oregon, November 11--18,
1999}",
title = "{SC'99: Oregon Convention Center 777 NE Martin Luther
King Jr. Boulevard, Portland, Oregon, November 11--18,
1999}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "1999",
ISBN = "",
ISBN-13 = "",
LCCN = "",
bibdate = "Thu Feb 24 09:35:00 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:1999:RAP,
editor = "J. J. Dongarra and E. Luque and Tomas Margalef",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 6th European PVM\slash MPI
Users' Group Meeting, Barcelona, Spain, September
26--29, 1999: proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 6th European PVM\slash MPI
Users' Group Meeting, Barcelona, Spain, September
26--29, 1999: proceedings}",
volume = "1697",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xvii + 551",
year = "1999",
ISBN = "3-540-66549-8 (softcover)",
ISBN-13 = "978-3-540-66549-6 (softcover)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 E973 1999",
bibdate = "Wed Dec 8 06:34:56 MST 1999",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
alttitle = "PVM/MPI '99",
keywords = "Data transmission systems; Parallel computers; Virtual
computer systems",
}
@Proceedings{ACM:2000:SHP,
editor = "{ACM}",
booktitle = "{SC2000: High Performance Networking and Computing.
Dallas Convention Center, Dallas, TX, USA, November
4--10, 2000}",
title = "{SC2000: High Performance Networking and Computing.
Dallas Convention Center, Dallas, TX, USA, November
4--10, 2000}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "2000",
ISBN = "",
ISBN-13 = "",
LCCN = "",
bibdate = "Thu Feb 24 09:35:00 2000",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.sc2000.org/proceedings/info/fp.pdf",
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:2000:RAP,
editor = "J. J. Dongarra and Peter Kacsuk and Norbert
Podhorszki",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 7th European PVM\slash MPI
Users' Group Meeting, Balatonfured, Hungary, September
10--13, 2000: proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 7th European PVM\slash MPI
Users' Group Meeting, Balatonfured, Hungary, September
10--13, 2000: proceedings}",
volume = "1908",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xv + 364",
year = "2000",
ISBN = "3-540-41010-4 (softcover)",
ISBN-13 = "978-3-540-41010-2 (softcover)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
bibdate = "Mon Oct 16 18:31:56 MDT 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "data transmission systems -- congresses; parallel
computers -- congresses; virtual computer systems --
congresses",
}
@Proceedings{Engquist:2000:SVG,
editor = "Bj{\"o}rn Engquist",
booktitle = "{Simulation and visualization on the grid:
Parallelldatorcentrum, Kungl. Tekniska H{\"o}gskolan,
seventh annual conference, Stockholm, Sweden, December
1999: proceedings}",
title = "{Simulation and visualization on the grid:
Parallelldatorcentrum, Kungl. Tekniska H{\"o}gskolan,
seventh annual conference, Stockholm, Sweden, December
1999: proceedings}",
volume = "13",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xiii + 300",
year = "2000",
ISBN = "3-540-67264-8",
ISBN-13 = "978-3-540-67264-7",
ISSN = "1439-7358",
LCCN = "QA76.9.C65 S535 2000",
bibdate = "Wed Oct 18 10:32:22 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCSE,
acknowledgement = ack-nhfb,
keywords = "Computer simulation -- Congresses. Visualization --
Congresses",
}
@Book{Koniges:2000:ISP,
editor = "Alice E. Koniges",
booktitle = "{Industrial Strength Parallel Computing}",
title = "{Industrial Strength Parallel Computing}",
publisher = pub-MORGAN-KAUFMANN,
address = pub-MORGAN-KAUFMANN:adr,
pages = "xxv + 597",
year = "2000",
ISBN = "1-55860-540-1",
ISBN-13 = "978-1-55860-540-4",
LCCN = "QA76.58 .I483 2000",
bibdate = "Fri Feb 04 18:30:40 2000",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Reynders:2000:IPI,
editor = "John Reynders and Alexander V. Veidenbaum",
booktitle = "{ICS '00: Proceedings of the 14th international
conference on Supercomputing: Santa Fe, New Mexico,
USA, May 8--11, 2000}",
title = "{ICS '00: Proceedings of the 14th international
conference on Supercomputing: Santa Fe, New Mexico,
USA, May 8--11, 2000}",
publisher = pub-ACM,
address = pub-ACM:adr,
bookpages = "xi + 509",
pages = "xi + 509",
year = "2000",
DOI = "https://doi.org/10.1145/335231",
ISBN = "1-58113-270-0",
ISBN-13 = "978-1-58113-270-0",
LCCN = "QA76.88 .I573 2000",
bibdate = "Fri Jul 27 05:22:06 2001",
bibsource = "http://www.acm.org/pubs/contents/proceedings/supercomputing/335231/;
http://www.math.utah.edu/pub/tex/bib/fparith.bib;
http://www.math.utah.edu/pub/tex/bib/java2000.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "https://dl.acm.org/doi/proceedings/10.1145/335231",
acknowledgement = ack-nhfb,
keywords = "AS/400; ESA/390; IA-64; Java Virtual Machine (JVM);
RS/6000",
}
@Proceedings{USENIX:2000:PAL,
editor = "{USENIX}",
booktitle = "{Proceedings of the 4th Annual Linux Showcase and
Conference, Atlanta, October 10--14, 2000, Atlanta,
Georgia, USA}",
title = "{Proceedings of the 4th Annual Linux Showcase and
Conference, Atlanta, October 10--14, 2000, Atlanta,
Georgia, USA}",
publisher = pub-USENIX,
address = pub-USENIX:adr,
pages = "394",
year = "2000",
ISBN = "1-880446-17-0",
ISBN-13 = "978-1-880446-17-1",
LCCN = "????",
bibdate = "Wed Oct 16 06:06:36 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
URL = "http://www.usenix.org/publications/library/proceedings/als2000/",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:2001:SHP,
editor = "{ACM}",
booktitle = "{SC2001: High Performance Networking and Computing.
Denver, CO, November 10--16, 2001}",
title = "{SC2001: High Performance Networking and Computing.
Denver, CO, November 10--16, 2001}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "2001",
ISBN = "1-58113-293-X",
ISBN-13 = "978-1-58113-293-9",
LCCN = "????",
bibdate = "Thu Feb 21 18:29:36 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Cotronis:2001:RAP,
editor = "Yiannis Cotronis and J. J. Dongarra",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 8th European PVM\slash MPI
Users' Group Meeting, Santorini\slash Thera, Greece,
September 23--26, 2001: proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 8th European PVM\slash MPI
Users' Group Meeting, Santorini\slash Thera, Greece,
September 23--26, 2001: proceedings}",
volume = "2131",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xv + 438",
year = "2001",
ISBN = "3-540-42609-4 (paperback)",
ISBN-13 = "978-3-540-42609-7 (paperback)",
LCCN = "QA76.58 E975 2001; QA267.A1 L43 no.2131",
bibdate = "Thu Jan 17 11:49:19 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS # " and " # ser-LNAI,
URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2131.htm",
acknowledgement = ack-nhfb,
keywords = "data transmission systems -- congresses; parallel
computers -- congresses; virtual computer systems --
congresses",
}
@Proceedings{Eigenmann:2001:OSM,
editor = "Rudolf Eigenmann and Michael J. Voss",
booktitle = "{OpenMP shared memory parallel programming:
International Workshop on OpenMP Applications and
Tools, WOMPAT 2001, West Lafayette, IN, USA, July
30--31, 2001: Proceedings}",
title = "{OpenMP shared memory parallel programming:
International Workshop on OpenMP Applications and
Tools, WOMPAT 2001, West Lafayette, IN, USA, July
30--31, 2001: Proceedings}",
volume = "2104",
publisher = pub-SV,
address = pub-SV:adr,
pages = "x + 184",
year = "2001",
CODEN = "LNCSD9",
DOI = "????",
ISBN = "3-540-42346-X (paperback)",
ISBN-13 = "978-3-540-42346-1 (paperback)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.642 .I589 2001; QA267.A1 L43 no.2104",
bibdate = "Thu Jan 17 11:49:19 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/multithreading.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2104.htm",
acknowledgement = ack-nhfb,
keywords = "parallel programming (computer science) --
congresses",
}
@Proceedings{IEEE:2002:STI,
editor = "{IEEE}",
booktitle = "{SC2002: From Terabytes to Insight. Proceedings of the
IEEE ACM SC 2002 Conference, November 16--22, 2002,
Baltimore, MD, USA}",
title = "{SC2002: From Terabytes to Insight. Proceedings of the
IEEE ACM SC 2002 Conference, November 16--22, 2002,
Baltimore, MD, USA}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "????",
year = "2002",
ISBN = "0-7695-1524-X",
ISBN-13 = "978-0-7695-1524-3",
LCCN = "????",
bibdate = "Thu Feb 21 18:29:36 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Kranzlmuller:2002:RAP,
editor = "Dieter Kranzlmuller",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 9th European PVM\slash MPI
Users' Group Meeting, Linz, Austria, September
29--October 2, 2002: proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 9th European PVM\slash MPI
Users' Group Meeting, Linz, Austria, September
29--October 2, 2002: proceedings}",
volume = "2474",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xvi + 462",
year = "2002",
ISBN = "3-540-44296-0 (softcover)",
ISBN-13 = "978-3-540-44296-7 (softcover)",
LCCN = "QA76.58 .E975 2002",
bibdate = "Sun Dec 1 08:06:09 MST 2002",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Also available via the World Wide Web",
series = ser-LNCS,
acknowledgement = ack-nhfb,
keywords = "data transmission systems --- congresses; parallel
computers -- congresses; virtual computer systems --
congresses",
}
@Proceedings{Oldehoeft:2002:SIS,
editor = "Rod Oldehoeft",
booktitle = "{Special issue on software for high-performance
systems: papers from the symposium of the Los Alamos
Computer Science Institute, held in Santa Fe, NM, USA
on October 15--18, 2001}",
title = "{Special issue on software for high-performance
systems: papers from the symposium of the Los Alamos
Computer Science Institute, held in Santa Fe, NM, USA
on October 15--18, 2001}",
volume = "23(1)",
publisher = pub-KLUWER,
address = pub-KLUWER:adr,
pages = "128",
year = "2002",
CODEN = "JOSUED",
ISSN = "0920-8542 (print), 1573-0484 (electronic)",
ISSN-L = "0920-8542",
bibdate = "Wed Jan 14 07:13:03 2004",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "The journal of supercomputing",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:2003:SII,
editor = "{ACM}",
booktitle = "{SC2003: Igniting Innovation. Phoenix, AZ, November
15--21, 2003}",
title = "{SC2003: Igniting Innovation. Phoenix, AZ, November
15--21, 2003}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "2003",
ISBN = "1-58113-695-1",
ISBN-13 = "978-1-58113-695-1",
LCCN = "????",
bibdate = "Thu Feb 21 18:29:36 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Dongarra:2003:RAP,
editor = "Jack Dongarra and Domenico Laforenza and Salvatore
Orlando",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 10th European PVM\slash MPI
User's group Meeting, Venice, Italy, September
29--October 2, 2003: Proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 10th European PVM\slash MPI
User's group Meeting, Venice, Italy, September
29--October 2, 2003: Proceedings}",
volume = "2840",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xviii + 693",
year = "2003",
CODEN = "LNCSD9",
ISBN = "3-540-20149-1",
ISBN-13 = "978-3-540-20149-6",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .E973 2003",
bibdate = "Tue Jan 13 19:17:43 2004",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2840.htm",
acknowledgement = ack-nhfb,
}
@Proceedings{Voss:2003:OSM,
editor = "Michael J. Voss",
booktitle = "{OpenMP shared memory parallel programming:
International Workshop on OpenMP Applications and
Tools, WOMPAT 2003, Toronto, Canada, June 26--27, 2003:
Proceedings}",
title = "{OpenMP shared memory parallel programming:
International Workshop on OpenMP Applications and
Tools, WOMPAT 2003, Toronto, Canada, June 26--27, 2003:
Proceedings}",
volume = "2716",
publisher = pub-SV,
address = pub-SV:adr,
pages = "viii + 270",
year = "2003",
CODEN = "LNCSD9",
DOI = "????",
ISBN = "3-540-40435-X (softcover)",
ISBN-13 = "978-3-540-40435-4 (softcover)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.642 .I589 2003",
bibdate = "Thu Aug 21 09:09:03 MDT 2003",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://link.springer-ny.com/link/service/series/0558/tocs/t2716.htm;
http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=2716",
acknowledgement = ack-nhfb,
keywords = "parallel programming (computer science) ---
congresses",
}
@Proceedings{ACM:2004:SHP,
editor = "{ACM}",
booktitle = "{SC 2004: High Performance Computing, Networking and
Storage: Bridging communities: Proceedings of the
IEEE\slash ACM Supercomputing 2004 Conference,
Pittsburgh, PA, November 6--12, 2004}",
title = "{SC 2004: High Performance Computing, Networking and
Storage: Bridging communities: Proceedings of the
IEEE\slash ACM Supercomputing 2004 Conference,
Pittsburgh, PA, November 6--12, 2004}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "2004",
ISBN = "0-7695-2153-3",
ISBN-13 = "978-0-7695-2153-4",
LCCN = "????",
bibdate = "Tue Dec 27 08:08:01 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Kranzlmuller:2004:RAP,
editor = "Dieter Kranzlm{\"u}ller and P{\'e}ter Kacsuk and Jack
J. Dongarra",
booktitle = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 11th European PVM/MPI Users'
Group Meeting, Budapest, Hungary, September 19--22,
2004: proceedings}",
title = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 11th European PVM/MPI Users'
Group Meeting, Budapest, Hungary, September 19--22,
2004: proceedings}",
volume = "3241",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xiii + 452",
year = "2004",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/b100820",
ISBN = "3-540-23163-3",
ISBN-13 = "978-3-540-23163-9",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .E973 2004",
bibdate = "Sat Jun 4 05:55:05 MDT 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = ser-LNCS,
URL = "http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=3241;
http://www.springerlink.com/openurl.asp?genre=volume&id=doi:10.1007/b100820",
acknowledgement = ack-nhfb,
meetingname = "European PVM/MPI Users' Group Meeting (11th: 2004:
Budapest, Hungary)",
subject = "Parallel computers; Congresses; Virtual computer
systems; Congresses; Data transmission systems;
Congresses",
}
@Proceedings{ACM:2005:PAI,
editor = "{ACM}",
booktitle = "{Proceedings of the 2005 ACM\slash IEEE conference on
Supercomputing 2005, Seattle, WA, November 12--18
2005}",
title = "{Proceedings of the 2005 ACM\slash IEEE conference on
Supercomputing 2005, Seattle, WA, November 12--18
2005}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "2005",
ISBN = "1-59593-061-2",
ISBN-13 = "978-1-59593-061-3",
LCCN = "????",
bibdate = "Tue Dec 27 08:08:01 2005",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Beyer:2005:GEC,
editor = "Hans-Georg Beyer and others",
booktitle = "{Genetic and Evolutionary Computation Conference:
GECCO 2005, June 25--29, 2005 (Saturday-Wednesday)
Washington, DC, USA}",
title = "{Genetic and Evolutionary Computation Conference:
GECCO 2005, June 25--29, 2005 (Saturday-Wednesday)
Washington, DC, USA}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "????",
year = "2005",
ISBN = "1-59593-010-8 (paperback)",
ISBN-13 = "978-1-59593-010-1 (paperback)",
LCCN = "QA76.623 .G44 2005",
bibdate = "Tue Mar 6 06:24:38 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/prng.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
note = "ACM order number 910050.",
acknowledgement = ack-nhfb,
subject = "Genetic algorithms; Data processing; Congresses;
Parallel processing (Electronic computers)",
}
@Proceedings{Chapman:2005:SMP,
editor = "Barbara M. Chapman",
booktitle = "{Shared memory parallel programming with OpenMP: 5th
International Workshop on OpenMP Applications and
Tools, WOMPAT 2004, Houston, TX, USA, May 17--18, 2004:
Revised selected papers}",
title = "{Shared memory parallel programming with OpenMP: 5th
International Workshop on OpenMP Applications and
Tools, WOMPAT 2004, Houston, TX, USA, May 17--18, 2004:
Revised selected papers}",
volume = "3349",
publisher = pub-SV,
address = pub-SV:adr,
pages = "x + 147",
year = "2005",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/b105895",
ISBN = "3-540-24560-X",
ISBN-13 = "978-3-540-24560-5",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76 .A1 L42 NO.3349",
bibdate = "Thu Jun 2 07:26:02 MDT 2005",
bibsource = "clavis.ucalgary.ca:2200/UNICORN;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/openurl.asp?genre=issue&issn=0302-9743&volume=3349;
http://www.springerlink.com/openurl.asp?genre=volume&id=doi:10.1007/b105895",
acknowledgement = ack-nhfb,
meetingname = "International Workshop on OpenMP Applications and
Tools (2004: Houston, Tex.)",
subject = "Parallel programming (Computer science); Congresses",
}
@Proceedings{DiMartino:2005:RAP,
editor = "Beniamino {Di Martino} and Dieter Kranzlm{\"u}ller and
J. J. Dongarra",
booktitle = "{Recent advances in parallel virtual machine and
message passing interface: 12th European PVM/MPI User's
Group Meeting, Sorrento, Italy, September 18--21, 2005:
proceedings}",
title = "{Recent advances in parallel virtual machine and
message passing interface: 12th European PVM/MPI User's
Group Meeting, Sorrento, Italy, September 18--21, 2005:
proceedings}",
volume = "3666",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xvii + 546",
year = "2005",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/11557265",
ISBN = "3-540-29009-5 (paperback)",
ISBN-13 = "978-3-540-29009-4 (paperback)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
LCCN = "QA76.58 .E973 2005",
bibdate = "Wed Apr 5 19:31:25 MDT 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
z3950.loc.gov:7090/Voyager",
series = ser-LNCS,
URL = "http://springerlink.metapress.com/openurl.asp?genre=issue&issn=0302-9743&volume=3666",
acknowledgement = ack-nhfb,
meetingname = "European PVM/MPI Users' Group Meeting (12th: 2005:
Sorrento, Italy)",
subject = "Parallel computers; Congresses; Virtual computer
systems; Data transmission systems",
}
@Proceedings{IEEE:2005:IPD,
editor = "{IEEE}",
booktitle = "{19th International Parallel and Distributed
Processing Symposium: proceedings: April 4--8, 2005,
Denver, Colorado}",
title = "{19th International Parallel and Distributed
Processing Symposium: proceedings: April 4--8, 2005,
Denver, Colorado}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
pages = "lv + 311",
year = "2005",
ISBN = "0-7695-2312-9",
ISBN-13 = "978-0-7695-2312-5",
LCCN = "????",
bibdate = "Fri May 27 14:11:22 2005",
bibsource = "http://www.math.utah.edu/pub/bibnet/authors/d/dongarra-jack-j.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "IEEE Computer Society Order Number P2312.",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:2006:PCC,
editor = "{ACM}",
booktitle = "{Proceedings of the 3rd conference on Computing
Frontiers, May 3--5, 2006, Ischia, Italy}",
title = "{Proceedings of the 3rd conference on Computing
Frontiers, May 3--5, 2006, Ischia, Italy}",
publisher = pub-ACM,
address = pub-ACM:adr,
year = "2006",
ISBN = "1-59593-302-6",
ISBN-13 = "978-1-59593-302-7",
LCCN = "",
bibdate = "Tue Jun 20 06:45:04 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM order number 104060.",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:2006:PST,
editor = "{ACM}",
booktitle = "{Proceedings of the 37th SIGCSE technical symposium on
Computer science education 2006, Houston, Texas, USA,
March 03--05, 2006}",
title = "{Proceedings of the 37th SIGCSE technical symposium on
Computer science education 2006, Houston, Texas, USA,
March 03--05, 2006}",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "????",
year = "2006",
ISBN = "1-59593-259-3",
ISBN-13 = "978-1-59593-259-4",
LCCN = "",
bibdate = "Tue Jun 20 06:53:22 2006",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "ACM order number 457060.",
acknowledgement = ack-nhfb,
}
@Proceedings{Mohr:2006:RAP,
editor = "Bernd Mohr and Jesper Larsson Tr{\"a}ff and Joachim
Worringen and Jack Dongarra",
booktitle = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 13th European PVM\slash MPI
User's Group Meeting Bonn, Germany, September 17--20,
2006 Proceedings}",
title = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 13th European PVM\slash MPI
User's Group Meeting Bonn, Germany, September 17--20,
2006 Proceedings}",
volume = "4192",
publisher = pub-SV,
address = pub-SV:adr,
pages = "104 (est.)",
year = "2006",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/11846802",
ISBN = "3-540-39110-X (print), 3-540-39112-6 (e-book)",
ISBN-13 = "978-3-540-39110-4 (print), 978-3-540-39112-8
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:21:40 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-540-39112-8",
acknowledgement = ack-nhfb,
}
@Proceedings{Cappello:2007:RAP,
editor = "Franck Cappello and Thomas Herault and Jack Dongarra",
booktitle = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 14th European PVM\slash MPI
User's Group Meeting, Paris, France, September 30 ---
October 3, 2007. Proceedings}",
title = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 14th European PVM\slash MPI
User's Group Meeting, Paris, France, September 30 ---
October 3, 2007. Proceedings}",
volume = "4757",
publisher = pub-SV,
address = pub-SV:adr,
pages = "116 (est.)",
year = "2007",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-540-75416-9",
ISBN = "3-540-75415-6 (print), 3-540-75416-4 (e-book)",
ISBN-13 = "978-3-540-75415-2 (print), 978-3-540-75416-9
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:25:09 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-540-75416-9",
acknowledgement = ack-nhfb,
}
@Proceedings{Simos:2007:CMS,
editor = "Theodore E. Simos and George Maroulis",
booktitle = "{Computation in Modern Science and Engineering:
Proceedings of the [Fifth] International Conference on
Computational Methods in Science and Engineering 2007
(ICCMSE 2007), Corfu, Greece, 25--30 September 2007}",
title = "{Computation in Modern Science and Engineering:
Proceedings of the [Fifth] International Conference on
Computational Methods in Science and Engineering 2007
(ICCMSE 2007), Corfu, Greece, 25--30 September 2007}",
volume = "2A, 2B",
publisher = pub-AIP,
address = pub-AIP:adr,
bookpages = "xxvi + 730 + 10 (vol. 2A)",
pages = "xxvi + 730 + 10 (vol. 2A)",
year = "2007",
ISBN = "0-7354-0476-3 (set), 0-7354-0477-1 (vol. 1),
0-7354-0478-X (vol. 2)",
ISBN-13 = "978-0-7354-0476-2 (set), 978-0-7354-0477-9 (vol. 1),
978-0-7354-0478-6 (vol. 2)",
ISSN = "0094-243X (print), 1551-7616 (electronic), 1935-0465",
LCCN = "Q183.9 .I524 2007",
bibdate = "Thu Feb 21 14:15:15 2008",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = "AIP Conference Proceedings (\#963)",
URL = "http://www.springer.com/physics/atoms/book/978-0-7354-0478-6",
abstract = "All papers have been peer-reviewed. The aim of ICCMSE
2007 is to bring together computational scientists and
engineers from several disciplines in order to share
methods, methodologies and ideas. The potential readers
of these proceedings are all the scientists with
interest in the following fields: Computational
Mathematics, Theoretical Physics, Computational
Physics, Theoretical Chemistry, Computational
Chemistry, Mathematical Chemistry, Computational
Engineering, Computational Mechanics, Computational
Biology and Medicine, Scientific Computation, High
Performance Computing, Parallel and Distributed
Computing, Visualization, Problem Solving Environments,
Software Tools, Advanced Numerical Algorithms, Modeling
and Simulation of Complex Systems, Web-based Simulation
and Computing, Grid-based Simulation and Computing,
Computational Grids, and Computer Science.",
acknowledgement = ack-nhfb,
remark = "Two volumes.",
}
@Proceedings{Bischof:2008:AAD,
editor = "Christian H. Bischof and H. Martin B{\"u}cker and Paul
Hovland and Uwe Naumann and Jean Utke",
booktitle = "Advances in Automatic Differentiation",
title = "Advances in Automatic Differentiation",
volume = "64",
publisher = pub-SV,
address = pub-SV:adr,
bookpages = "xviii + 362",
pages = "xviii + 362",
year = "2008",
CODEN = "LNCSA6",
DOI = "https://doi.org/10.1007/978-3-540-68942-3",
ISBN = "3-540-68935-4 (print), 3-540-68942-7 (e-book)",
ISBN-13 = "978-3-540-68935-5 (print), 978-3-540-68942-3
(e-book)",
ISSN = "1439-7358",
ISSN-L = "1439-7358",
LCCN = "QA304 .I58 2008",
bibdate = "Thu Dec 20 14:35:07 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCSE,
URL = "http://link.springer.com/book/10.1007/978-3-540-68942-3;
http://www.springerlink.com/content/978-3-540-68942-3",
acknowledgement = ack-nhfb,
remark = "The Fifth International Conference on Automatic
Differentiation held from August 11 to 15, 2008 in
Bonn, Germany, is the most recent one in a series that
began in Breckenridge, USA, in 1991 and continued in
Santa Fe, USA, in 1996, Nice, France, in 2000 and
Chicago, USA, in 2004.",
series-URL = "http://link.springer.com/bookseries/3527",
}
@Proceedings{Chapman:2008:PPM,
editor = "Barbara Chapman and Weiming Zheng and Guang R. Gao and
Mitsuhisa Sato and Eduard Ayguad{\'e} and Dongsheng
Wang",
booktitle = "{A Practical Programming Model for the Multi-Core Era:
3rd International Workshop on OpenMP, IWOMP 2007,
Beijing, China, June 3--7, 2007 Proceedings}",
title = "{A Practical Programming Model for the Multi-Core Era:
3rd International Workshop on OpenMP, IWOMP 2007,
Beijing, China, June 3--7, 2007 Proceedings}",
volume = "4935",
publisher = pub-SV,
address = pub-SV:adr,
pages = "184 (est.)",
year = "2008",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-540-69303-1",
ISBN = "3-540-69302-5 (print), 3-540-69303-3 (e-book)",
ISBN-13 = "978-3-540-69302-4 (print), 978-3-540-69303-1
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:20:29 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-540-69303-1",
acknowledgement = ack-nhfb,
}
@Proceedings{Eigenmann:2008:ONE,
editor = "Rudolf Eigenmann and Bronis R. de Supinski",
booktitle = "{OpenMP in a New Era of Parallelism: 4th International
Workshop, IWOMP 2008 West Lafayette, IN, USA, May
12--14, 2008 Proceedings}",
title = "{OpenMP in a New Era of Parallelism: 4th International
Workshop, IWOMP 2008 West Lafayette, IN, USA, May
12--14, 2008 Proceedings}",
volume = "5004",
publisher = pub-SV,
address = pub-SV:adr,
pages = "190 (est.)",
year = "2008",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-540-79561-2",
ISBN = "3-540-79560-X (print), 3-540-79561-8 (e-book)",
ISBN-13 = "978-3-540-79560-5 (print), 978-3-540-79561-2
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:21:59 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-540-79561-2",
acknowledgement = ack-nhfb,
}
@Proceedings{Lastovetsky:2008:RAP,
editor = "Alexey Lastovetsky and Tahar Kechadi and Jack
Dongarra",
booktitle = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 15th European PVM\slash MPI
Users' Group Meeting, Dublin, Ireland, September 7--10,
2008. Proceedings}",
title = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 15th European PVM\slash MPI
Users' Group Meeting, Dublin, Ireland, September 7--10,
2008. Proceedings}",
volume = "5205",
publisher = pub-SV,
address = pub-SV:adr,
pages = "129 (est.)",
year = "2008",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-540-87475-1",
ISBN = "3-540-87474-7 (print), 3-540-87475-5 (e-book)",
ISBN-13 = "978-3-540-87474-4 (print), 978-3-540-87475-1
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:17:37 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-540-87475-1",
acknowledgement = ack-nhfb,
}
@Proceedings{Mueller:2008:OSM,
editor = "Matthias S. Mueller and Barbara M. Chapman and Bronis
R. de Supinski and Allen D. Malony and Michael Voss",
booktitle = "{OpenMP Shared Memory Parallel Programming:
International Workshops, IWOMP 2005 and IWOMP 2006,
Eugene, OR, USA, June 1--4, 2005, Reims, France, June
12--15, 2006. Proceedings}",
title = "{OpenMP Shared Memory Parallel Programming:
International Workshops, IWOMP 2005 and IWOMP 2006,
Eugene, OR, USA, June 1--4, 2005, Reims, France, June
12--15, 2006. Proceedings}",
volume = "4315",
publisher = pub-SV,
address = pub-SV:adr,
pages = "252 (est.)",
year = "2008",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-540-68555-5",
ISBN = "3-540-68554-5 (print), 3-540-68555-3 (e-book)",
ISBN-13 = "978-3-540-68554-8 (print), 978-3-540-68555-5
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:24:26 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-540-68555-5",
acknowledgement = ack-nhfb,
}
@Book{Nguyen:2008:GG,
editor = "Hubert Nguyen",
booktitle = "{GPU} gems 3",
title = "{GPU} gems 3",
volume = "3",
publisher = pub-AW,
address = pub-AW:adr,
pages = "l + 942",
year = "2008",
ISBN = "0-321-51526-9",
ISBN-13 = "978-0-321-51526-1",
LCCN = "T385 .G6882 2008",
bibdate = "Thu Jul 29 13:36:54 MDT 2010",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/unix.bib;
z3950.loc.gov:7090/Voyager",
series = "GPU gems",
URL = "http://www.loc.gov/catdir/toc/ecip0720/2007023985.html",
acknowledgement = ack-nhfb,
keywords = "CUDA; nVIDIA",
subject = "Computer graphics; Real-time programming",
}
@Proceedings{Muller:2009:EOA,
editor = "Matthias S. M{\"u}ller and Bronis R. de Supinski and
Barbara M. Chapman",
booktitle = "{Evolving OpenMP in an Age of Extreme Parallelism: 5th
International Workshop on OpenMP, IWOMP 2009 Dresden,
Germany, June 3--5, 2009 Proceedings}",
title = "{Evolving OpenMP in an Age of Extreme Parallelism: 5th
International Workshop on OpenMP, IWOMP 2009 Dresden,
Germany, June 3--5, 2009 Proceedings}",
volume = "5568",
publisher = pub-SV,
address = pub-SV:adr,
pages = "182 (est.)",
year = "2009",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-02303-3",
ISBN = "3-642-02284-7 (print), 3-642-02303-7 (e-book)",
ISBN-13 = "978-3-642-02284-5 (print), 978-3-642-02303-3
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:25:20 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-02303-3",
acknowledgement = ack-nhfb,
}
@Proceedings{Ropo:2009:RAP,
editor = "Matti Ropo and Jan Westerholm and Jack Dongarra",
booktitle = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 16th European PVM\slash MPI
Users' Group Meeting, Espoo, Finland, September 7--10,
2009. Proceedings}",
title = "{Recent Advances in Parallel Virtual Machine and
Message Passing Interface: 16th European PVM\slash MPI
Users' Group Meeting, Espoo, Finland, September 7--10,
2009. Proceedings}",
volume = "5759",
publisher = pub-SV,
address = pub-SV:adr,
pages = "142 (est.)",
year = "2009",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-03770-2",
ISBN = "3-642-03769-0 (print), 3-642-03770-4 (e-book)",
ISBN-13 = "978-3-642-03769-6 (print), 978-3-642-03770-2
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:20:58 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-03770-2",
acknowledgement = ack-nhfb,
}
@Proceedings{Tuncer:2009:PCF,
editor = "Ismail H. Tuncer and {\"U}lgen G{\"u}lcat and David R.
Emerson and Kenichi Matsuno",
booktitle = "{Parallel Computational Fluid Dynamics 2007:
Implementations and Experiences on Large Scale and Grid
Computing}",
title = "{Parallel Computational Fluid Dynamics 2007:
Implementations and Experiences on Large Scale and Grid
Computing}",
volume = "67",
publisher = pub-SV,
address = pub-SV:adr,
bookpages = "xi + 480",
pages = "xi + 480",
year = "2009",
CODEN = "LNCSA6",
ISBN = "3-540-92743-3 (print), 3-540-92744-1 (e-book)",
ISBN-13 = "978-3-540-92743-3 (print), 978-3-540-92744-0
(e-book)",
ISSN = "1439-7358",
ISSN-L = "1439-7358",
LCCN = "????",
bibdate = "Thu Dec 20 14:35:19 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Parallel CFD 2007 was held in Antalya, Turkey, from
May 21 to 24, 2007.",
series = ser-LNCSE,
URL = "http://link.springer.com/book/10.1007/978-3-540-92744-0;
http://www.springerlink.com/content/978-3-540-92744-0",
acknowledgement = ack-nhfb,
series-URL = "http://link.springer.com/bookseries/3527",
tableofcontents = "Numerical Simulation of a Spinning Projectile Using
Parallel and Vectorized Unstructured Flow Solver \\
Development of a Framework for Parallel Simulators with
Various Physics and its Performance \\
Experience in Parallel Computational Mechanics on
Marenostrum \\
New Approaches to Modeling Rarefied Gas Flow in the
Slip and Transition Regime \\
A Parallel Scientific Software for Heterogeneous
Hydrogeology \\
Aerodynamic Shape Optimization Methods on
Multiprocessor Platforms \\
Non-Sinusoidal Path Optimization of Dual Airfoils
Flapping in a Biplane Configuration Parallel
Computation of $3$-D Viscous Flows on Hybrid Grids
Implementation of Parallel DSMC Method to Adiabatic
Piston Problem \\
Efficient Parallel Algorithm for Multiconstrained
Optimization of Wing-Body Configurations \\
Parallel Three Dimensional Direct Simulation Monte
Carlo for Simulating Micro Flows \\
A Study on the Prediction of the Aerodynamic
Characteristics of an Orbital Block of a Launch Vehicle
in the Rarefied Flow Regime Using the DSMC Approach and
the Parallel Computation \\
Parallel Solution of a $3$-D Mixed Convection Problem
Computation of Hypersonic Flow of a Diatomic Gas in
Rotational Non-Equilibrium Past a Blunt Body Using the
Generalized Boltzmann Equation Application of Parallel
Processing to Numerical Modeling of Two-Phase
Deflagration-to-Detonation (DDT) Phenomenon \\
Highly Scalable Multiphysics Computational Framework
for Propulsive Energetic Systems \\
A Parallel Aitken-Additive Schwarz Waveform Relaxation
Method for Parabolic Problems \\
Parallel Computation of Incompressible Flows Driven by
Moving Multiple Obstacles Using a New Moving
Embedded-Grid Method \\
Parallel Computing on Network of Windows Based PCs
Parallel Computations of Droplet Oscillations Cyclic
Distribution of Pipelined Parallel Deferred Correction
Method for ODE/DAE \\
Hybrid Parallelization Techniques for Lattice Boltzmann
Free Surface Flows \\
Flow-Structure Interaction and Flow Analysis of
Hydraulic Machineron a Computational Grid \\
Parallel Computation of Incompressible Flow Using
Building-Cube Method \\
$3$D Model of Pollution Distribution in City Air and
its Parallel Realization \\
Parallel Navier-Stokes Solution of a Wing-Flap
Configuration on Structured Multi-Block Oversetting
Grids Parallel Navier-Stokes Solutions of NASA 65\?
Delta-Wing Parallel Turbulent Navier-Stokes Solutions
of Wing alone Geometries for Drag Prediction \\
Adaptive Aitken-Schwarz for Darcy $3$D Flow on
Heterogeneous Media \\
Numerical Simulation of Compressible Flow using
Three-Dimensional Unstructured Added/Eliminated Grid
Method \\
Technology of Parallelization for $2$D and $3$D CFD/CAA
Codes based on High-Accuracy Explicit Methods on
Unstructured Meshes \\
Separate Treatment of Momentum and Heat Flows in
Parallel Environment \\
DNS of Turbulent Natural Convection Flows on the Mare
Nostrum Supercomputer Termo Fluids: A New Parallel
Unstructured CFD Code for the Simulation of Turbulent
Industrial Problems on Low Cost PC Cluster",
}
@Proceedings{Chaudhuri:2010:PIC,
editor = "Pranay Chaudhuri and Sukumar Ghosh and Raj Kumar Buyya
and Jian-Nong Cao and Oeepak Oahiya",
booktitle = "{Proceedings of the 2010 1st International Conference
on Parallel Distributed and Grid Computing (PDGC),
Jaypee University of Information Technology Waknaghat,
Solan, HP, India, 28--30 October, 2010}",
title = "{Proceedings of the 2010 1st International Conference
on Parallel Distributed and Grid Computing (PDGC),
Jaypee University of Information Technology Waknaghat,
Solan, HP, India, 28--30 October, 2010}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
bookpages = "xiii + 382",
pages = "xiii + 382",
year = "2010",
ISBN = "1-4244-7675-5",
ISBN-13 = "978-1-4244-7675-6",
LCCN = "????",
bibdate = "Thu Apr 21 10:51:00 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/cryptography2010.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Keller:2010:RAM,
editor = "Rainer Keller and Edgar Gabriel and Michael Resch and
Jack Dongarra",
booktitle = "{Recent Advances in the Message Passing Interface:
17th European MPI Users' Group Meeting, EuroMPI 2010,
Stuttgart, Germany, September 12--15, 2010.
Proceedings}",
title = "{Recent Advances in the Message Passing Interface:
17th European MPI Users' Group Meeting, EuroMPI 2010,
Stuttgart, Germany, September 12--15, 2010.
Proceedings}",
volume = "6305",
publisher = pub-SV,
address = pub-SV:adr,
pages = "197 (est.)",
year = "2010",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-15646-5",
ISBN = "3-642-15645-2 (print), 3-642-15646-0 (e-book)",
ISBN-13 = "978-3-642-15645-8 (print), 978-3-642-15646-5
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:24:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-15646-5",
acknowledgement = ack-nhfb,
}
@Proceedings{Sato:2010:BLL,
editor = "Mitsuhisa Sato and Toshihiro Hanawa and Matthias S.
M{\"u}ller and Barbara M. Chapman and Bronis R. de
Supinski",
booktitle = "{Beyond Loop Level Parallelism in OpenMP:
Accelerators, Tasking and More: 6th International
Workshop on OpenMP, IWOMP 2010, Tsukuba, Japan, June
14--16, 2010 Proceedings}",
title = "{Beyond Loop Level Parallelism in OpenMP:
Accelerators, Tasking and More: 6th International
Workshop on OpenMP, IWOMP 2010, Tsukuba, Japan, June
14--16, 2010 Proceedings}",
volume = "6132",
publisher = pub-SV,
address = pub-SV:adr,
pages = "173 (est.)",
year = "2010",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-13217-9",
ISBN = "3-642-13216-2 (print), 3-642-13217-0 (e-book)",
ISBN-13 = "978-3-642-13216-2 (print), 978-3-642-13217-9
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:20:26 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-13217-9",
acknowledgement = ack-nhfb,
}
@Proceedings{ACM:2011:SSP,
editor = "{ACM}",
booktitle = "{SC '11 State of the Practice Reports}",
title = "{SC '11 State of the Practice Reports}",
publisher = pub-ACM,
address = pub-ACM:adr,
year = "2011",
ISBN = "1-4503-1139-3",
ISBN-13 = "978-1-4503-1139-7",
LCCN = "????",
bibdate = "Fri Dec 16 11:20:09 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Chapman:2011:OPE,
editor = "Barbara M. Chapman and William D. Gropp and Kalyan
Kumaran and Matthias S. M{\"u}ller",
booktitle = "{OpenMP in the Petascale Era: 7th International
Workshop on OpenMP, IWOMP 2011, Chicago, IL, USA, June
13--15, 2011. Proceedings}",
title = "{OpenMP in the Petascale Era: 7th International
Workshop on OpenMP, IWOMP 2011, Chicago, IL, USA, June
13--15, 2011. Proceedings}",
volume = "6665",
publisher = pub-SV,
address = pub-SV:adr,
pages = "178 (est.)",
year = "2011",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-21487-5",
ISBN = "3-642-21486-X (print), 3-642-21487-8 (e-book)",
ISBN-13 = "978-3-642-21486-8 (print), 978-3-642-21487-5
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:23:23 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-21487-5",
acknowledgement = ack-nhfb,
}
@Proceedings{Cotronis:2011:RAM,
editor = "Yiannis Cotronis and Anthony Danalis and Dimitrios S.
Nikolopoulos and Jack Dongarra",
booktitle = "{Recent Advances in the Message Passing Interface:
18th European MPI Users' Group Meeting, EuroMPI 2011,
Santorini, Greece, September 18--21, 2011.
Proceedings}",
title = "{Recent Advances in the Message Passing Interface:
18th European MPI Users' Group Meeting, EuroMPI 2011,
Santorini, Greece, September 18--21, 2011.
Proceedings}",
volume = "6960",
publisher = pub-SV,
address = pub-SV:adr,
pages = "177 (est.)",
year = "2011",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-24449-0",
ISBN = "3-642-24448-3 (print), 3-642-24449-1 (e-book)",
ISBN-13 = "978-3-642-24448-3 (print), 978-3-642-24449-0
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:21:14 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-24449-0",
acknowledgement = ack-nhfb,
}
@Proceedings{Lathrop:2011:SPI,
editor = "Scott Lathrop and Jim Costa and William Kramer",
booktitle = "{SC'11: Proceedings of 2011 International Conference
for High Performance Computing, Networking, Storage and
Analysis, Seattle, WA, November 12--18 2011}",
title = "{SC'11: Proceedings of 2011 International Conference
for High Performance Computing, Networking, Storage and
Analysis, Seattle, WA, November 12--18 2011}",
publisher = pub-ACM # " and " # pub-IEEE,
address = pub-ACM:adr # " and " # pub-IEEE:adr,
pages = "????",
year = "2011",
ISBN = "1-4503-0771-X",
ISBN-13 = "978-1-4503-0771-0",
LCCN = "????",
bibdate = "Fri Dec 16 11:11:35 2011",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2011.bib",
acknowledgement = ack-nhfb,
xxeditor = "{ACM}",
}
@Proceedings{Tromeur-Dervout:2011:PCF,
editor = "Damien Tromeur-Dervout and Gunther Brenner and David
R. Emerson and Jocelyne Erhel",
booktitle = "{Parallel Computational Fluid Dynamics 2008: Parallel
Numerical Methods, Software Development and
Applications}",
title = "{Parallel Computational Fluid Dynamics 2008: Parallel
Numerical Methods, Software Development and
Applications}",
volume = "74",
publisher = pub-SV,
address = pub-SV:adr,
bookpages = "xi + 432",
pages = "xi + 432",
year = "2011",
CODEN = "LNCSA6",
DOI = "https://doi.org/10.1007/978-3-642-14438-7",
ISBN = "3-642-14437-3 (print), 3-642-14438-1 (e-book)",
ISBN-13 = "978-3-642-14437-0 (print), 978-3-642-14438-7
(e-book)",
ISSN = "1439-7358",
ISSN-L = "1439-7358",
LCCN = "????",
bibdate = "Thu Dec 20 14:35:30 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
note = "Proceedings of the twentieth meeting, Parallel CFD
2008, held May 19--22, 2008 in Lyon, France.",
series = ser-LNCSE,
URL = "http://link.springer.com/book/10.1007/978-3-642-14438-7;
http://www.springerlink.com/content/978-3-642-14438-7",
acknowledgement = ack-nhfb,
series-URL = "http://link.springer.com/bookseries/3527",
tableofcontents = "Preface \\
Contents \\
Part I: Invited speakers \\
Part II: Optimisation in Aerodynamics Design \\
Part III: Grid methods \\
Part IV: Boundary methods \\
Part V: High Order methods \\
Part VI: Parallel Algorithms and Solvers \\
Part VII: Lattice Boltzman and SPH Methods \\
Part VIII: software Framework and Component
Architecture \\
Part IX: Parallel Performance \\
Part X: Environment and biofluids applications \\
Part XI: General fluid \\
Editorial Policy",
}
@Proceedings{Chapman:2012:OHW,
editor = "Barbara M. Chapman and Federico Massaioli and Matthias
S. M{\"u}ller and Marco Rorro",
booktitle = "{OpenMP in a Heterogeneous World: 8th International
Workshop on OpenMP, IWOMP 2012, Rome, Italy, June
11--13, 2012. Proceedings}",
title = "{OpenMP in a Heterogeneous World: 8th International
Workshop on OpenMP, IWOMP 2012, Rome, Italy, June
11--13, 2012. Proceedings}",
volume = "7312",
publisher = pub-SV,
address = pub-SV:adr,
pages = "257 (est.)",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-30961-8",
ISBN = "3-642-30960-7 (print), 3-642-30961-5 (e-book)",
ISBN-13 = "978-3-642-30960-1 (print), 978-3-642-30961-8
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:19:49 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-30961-8",
acknowledgement = ack-nhfb,
}
@Proceedings{Hollingsworth:2012:SPI,
editor = "Jeffrey Hollingsworth",
booktitle = "{SC '12: Proceedings of the International Conference
on High Performance Computing, Networking, Storage and
Analysis, Salt Lake Convention Center, Salt Lake City,
UT, USA, November 10--16, 2012}",
title = "{SC '12: Proceedings of the International Conference
on High Performance Computing, Networking, Storage and
Analysis, Salt Lake Convention Center, Salt Lake City,
UT, USA, November 10--16, 2012}",
publisher = pub-IEEE,
address = pub-IEEE:adr,
year = "2012",
ISBN = "1-4673-0804-8",
ISBN-13 = "978-1-4673-0804-5",
bibdate = "Thu Nov 15 07:35:55 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/pvm.bib;
http://www.math.utah.edu/pub/tex/bib/supercomputing2012.bib",
acknowledgement = ack-nhfb,
}
@Proceedings{Traff:2012:RAM,
editor = "Jesper Larsson Tr{\"a}ff and Siegfried Benkner and
Jack J. Dongarra",
booktitle = "{Recent Advances in the Message Passing Interface:
19th European MPI Users' Group Meeting, EuroMPI 2012,
Vienna, Austria, September 23--26, 2012. Proceedings}",
title = "{Recent Advances in the Message Passing Interface:
19th European MPI Users' Group Meeting, EuroMPI 2012,
Vienna, Austria, September 23--26, 2012. Proceedings}",
volume = "7490",
publisher = pub-SV,
address = pub-SV:adr,
pages = "162 (est.)",
year = "2012",
CODEN = "LNCSD9",
DOI = "https://doi.org/10.1007/978-3-642-33518-1",
ISBN = "3-642-33517-9 (print), 3-642-33518-7 (e-book)",
ISBN-13 = "978-3-642-33517-4 (print), 978-3-642-33518-1
(e-book)",
ISSN = "0302-9743 (print), 1611-3349 (electronic)",
ISSN-L = "0302-9743",
LCCN = "????",
bibdate = "Wed Dec 19 15:23:42 MST 2012",
bibsource = "http://www.math.utah.edu/pub/tex/bib/lncs.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCS,
URL = "http://www.springerlink.com/content/978-3-642-33518-1",
acknowledgement = ack-nhfb,
}
@Proceedings{Erhel:2014:DDM,
editor = "Jocelyne Erhel and Martin J. Gander and Laurence
Halpern and G{\'e}raldine Pichot and Taoufik Sassi and
Olof Widlund",
booktitle = "Domain Decomposition Methods in Science and
Engineering {XXI}",
title = "Domain Decomposition Methods in Science and
Engineering {XXI}",
volume = "98",
publisher = pub-SV,
address = pub-SV:adr,
pages = "xx + 973 + 220",
year = "2014",
DOI = "https://doi.org/10.1007/978-3-319-05789-7",
ISBN = "3-319-05788-X (paperback), 3-319-05789-8 (e-book)",
ISBN-13 = "978-3-319-05788-0 (paperback), 978-3-319-05789-7
(e-book)",
ISSN = "1439-7358 (print), 2197-7100 (electronic)",
ISSN-L = "1439-7358",
LCCN = "QA71-90",
bibdate = "Sat Dec 12 10:43:35 MST 2015",
bibsource = "fsz3950.oclc.org:210/WorldCat;
http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCSE,
URL = "http://0-dx.doi.org.fama.us.es/10.1007/978-3-319-05789-7",
abstract = "This volume contains a selection of papers presented
at the 21st international conference on domain
decomposition methods in science and engineering held
in Rennes, France, June 25-29, 2012. Domain
decomposition is an active and interdisciplinary
research discipline, focusing on the development,
analysis and implementation of numerical methods for
massively parallel computers. Domain decomposition
methods are among the most efficient solvers for large
scale applications in science and engineering. They are
based on a solid theoretical foundation and shown to be
scalable for many important applications. Domain
decomposition techniques can also naturally take into
account multiscale phenomena. This book contains the
most recent results in this important field of
research, both mathematically and algorithmically and
allows the reader to get an overview of this exciting
branch of numerical analysis and scientific
computing.",
acknowledgement = ack-nhfb,
tableofcontents = "Preface \\
Part I: Plenary Presentations \\
Part II: Minisymposia \\
Part III: Contributed Presentations",
}
@Book{Mehl:2015:RTC,
editor = "Miriam Mehl and Manfred Bischoff and Michael
Sch{\"a}fer",
booktitle = "Recent Trends in Computational Engineering ---
{CE2014}: Optimization, Uncertainty, Parallel
Algorithms, Coupled and Complex Problems",
title = "Recent Trends in Computational Engineering ---
{CE2014}: Optimization, Uncertainty, Parallel
Algorithms, Coupled and Complex Problems",
volume = "105",
publisher = pub-SV,
address = pub-SV:adr,
pages = "317 (est.)",
year = "2015",
ISBN = "3-319-22996-6, 3-319-22997-4 (e-book)",
ISBN-13 = "978-3-319-22996-6, 978-3-319-22997-3 (e-book)",
LCCN = "QA71-90; TA329",
bibdate = "Sat Dec 12 10:43:43 MST 2015",
bibsource = "fsz3950.oclc.org:210/WorldCat;
http://www.math.utah.edu/pub/tex/bib/lncse.bib;
http://www.math.utah.edu/pub/tex/bib/matlab.bib;
http://www.math.utah.edu/pub/tex/bib/pvm.bib",
series = ser-LNCSE,
URL = "http://www.springerlink.com/content/978-3-319-22997-3",
acknowledgement = ack-nhfb,
meetingname = "International Workshop on Computational Engineering
(3rd : 2014 : Stuttgart, Germany)",
subject = "Engineering mathematics; Congresses; TECHNOLOGY and
ENGINEERING / Engineering (General); TECHNOLOGY and
ENGINEERING / Reference; Engineering mathematics.",
}