@Preamble{
"\input bibnames.sty"
# "\ifx \undefined \circled \def \circled #1{(#1)}\fi"
# "\ifx \undefined \reg \def \reg {\circled{R}}\fi"
# "\ifx \undefined \TM \def \TM {${}^{\sc TM}$} \fi"
}
@String{ack-nhfb = "Nelson H. F. Beebe,
University of Utah,
Department of Mathematics, 110 LCB,
155 S 1400 E RM 233,
Salt Lake City, UT 84112-0090, USA,
Tel: +1 801 581 5254,
FAX: +1 801 581 4148,
e-mail: \path|beebe@math.utah.edu|,
\path|beebe@acm.org|,
\path|beebe@computer.org| (Internet),
URL: \path|https://www.math.utah.edu/~beebe/|"}
@String{j-TOCS = "ACM Transactions on Computer Systems"}
@String{pub-ACM = "ACM Press"}
@String{pub-ACM:adr = "New York, NY 10036, USA"}
@Article{Jones:1983:EI,
author = "Anita K. Jones",
title = "{Editor}'s Introduction",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "1--2",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Reed:1983:IAA,
author = "David P. Reed",
title = "Implementing Atomic Actions on Decentralized Data",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "3--23",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Clark:1983:CPV,
author = "Douglas W. Clark",
key = "Clark",
title = "Cache Performance in the {VAX-11\slash 780}",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "24--37",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Oct 12 13:58:27 1984",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Math/sparse.linear.systems.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/storage.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "The performance of memory caches is usually studied
through trace-driven simulation. This approach has
several drawbacks. Notably, it excludes realistic
multiprogramming, operating system, and I/O activity.
In this paper, cache performance is studied by direct
measurement of the hardware. A hardware monitor was
attached to a VAX-11/780 computer, whose cache was then
measured during normal use. A producible synthetic
timesharing workload was also run. This paper reports
measurements including the hit ratios of data and
instruction references, the rate of cache invalidations
by I/O, and the amount of waiting time due to cache
misses. Additional measurements were made with half the
cache disabled, and with the entire cache disabled.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Shamir:1983:GCS,
author = "Adi Shamir",
title = "On the Generation of Cryptographically Strong
Pseudorandom Sequences",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "38--44",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Cox:1983:ICP,
author = "George W. Cox and William M. Corwin and Konrad K. Lai
and Fred J. Pollack",
title = "Interprocess Communication, and Processor Dispatching
on the {Intel 432}",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "45--66",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 9 09:46:02 1986",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Parallel/Multi.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "A unified facility for interprocess communication and
processor dispatching on the Intel 432 is described.
The facility is based on a queuing and binding
mechanism called a port. The goals and motivations for
ports, both abstract and implementation views of them,
and their absolute and comparative performance are
described.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
owner = "seufert",
}
@Article{Sauer:1983:CAS,
author = "Charles H. Sauer",
title = "Computational Algorithms for State-Dependent Queueing
Networks",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "67--92",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "See corrigendum \cite{Sauer:1983:CCA}.",
acknowledgement = ack-nhfb,
annote = "\ldots{} in this paper the author limits the material
reviewed to three forms of state dependency in queueing
networks which have the product form. The major part of
the paper address state-dependent routing, in which the
probability of entering a queue of a subnetwork depend
upon the quotient of a linear function of the number of
customers in that queue and another linear function of
the total number of customers in the subnetwork \ldots{}",
country = "USA",
date = "28/09/84",
descriptors = "Queueing network; method; state dependent queueing;
MVA; CONVOLUTION ALGORITHM",
enum = "2690",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
language = "English",
location = "RWTH-AC-DFV: TELL",
references = "26",
revision = "21/04/91",
}
@Article{Anonymous:1983:IA,
author = "Anonymous",
title = "Information for Authors",
journal = j-TOCS,
volume = "1",
number = "1",
pages = "93--95",
month = feb,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Schwetman:1983:PSI,
author = "Herbert D. Schwetman",
title = "Preface to the Special Issue",
journal = j-TOCS,
volume = "1",
number = "2",
pages = "97--98",
month = may,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Eager:1983:PBH,
author = "Derek L. Eager and Kenneth C. Sevcik",
title = "Performance Bound Hierarchies for Queueing Networks",
journal = j-TOCS,
volume = "1",
number = "2",
pages = "99--115",
month = may,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Agrawal:1983:ASM,
author = "Subhash C. Agrawal and Jeffrey P. Buzen",
title = "The Aggregate Server Method for Analyzing
Serialization Delays in Computer Systems",
journal = j-TOCS,
volume = "1",
number = "2",
pages = "116--143",
month = may,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1982.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
annote = "An approximate, iterative method is presented to
estimate the delays caused by programs waiting to enter
critical sections and other software control structures
in which mutual exclusion is enforced (i.e.,
one-at-a-time or serialized processing). Some common
shares of such serialization delays include routines
that perform resource allocation, modify internal data
structures, or update external files and
databases \ldots{}",
country = "USA",
date = "02/12/83",
descriptors = "Queueing approximation; process management; aggregate
server method; serialization; resource allocation",
enum = "38",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
language = "English",
location = "RWTH-AC-DFV: Bibl.",
references = "0",
revision = "19/03/92",
}
@Article{Chandy:1983:DDD,
author = "K. Mani Chandy and Laura M. Haas and Jayadev Misra",
title = "Distributed Deadlock Detection",
journal = j-TOCS,
volume = "1",
number = "2",
pages = "144--156",
month = may,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/Discrete.event.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
annote = "Distributed deadlock models are presented for resource
and communication deadlocks. Simple distributed
algorithms for detection of these deadlocks are given.
We show that all true deadlocks are detected and that
no false deadlocks are reported. In our algorithms, no
process maintains global information; all messages have
an identical short length. The algorithms can be
applied in distributed database and other message
communication systems.",
country = "USA",
date = "00/00/00",
descriptors = "DISTRIBUTED SIMULATION; COMPUTER NETWORK; DEADLOCK",
enum = "8087",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
language = "English",
location = "UniS-IND-DS_C",
references = "15",
revision = "19/10/93",
xxnote = "Check author order??",
}
@Article{Cappello:1983:VLP,
author = "Peter R. Cappello and Kenneth Steiglitz",
title = "A {VLSI} Layout for a Pipelined {Dadda} Multiplier",
journal = j-TOCS,
volume = "1",
number = "2",
pages = "157--174",
month = May,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Math/computer.arithmetic.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "Reprinted in E. E. Swartzlander, {\em Computer
Arithmetic}, Vol. 2, IEEE Computer Society Press
Tutorial, Los Alamitos, CA, 1990.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "multiplication",
}
@Article{Blum:1983:HES,
author = "Manuel Blum",
title = "How to Exchange (Secret) Keys",
journal = j-TOCS,
volume = "1",
number = "2",
pages = "175--193",
month = may,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Theory/crypto.security.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "Previously published in ACM STOC '83 proceedings,
pages 440--447.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hoshino:1983:PPM,
author = "Tsutomu Hoshino and Toshio Kawai and Tomonori
Shirakawa and Junichi Higashino and Akira Yamaoka and
Hachidai Ito and Takashi Sato and Kazuo Sawada",
title = "{PACS}: a Parallel Microprocessor Array for Scientific
Calculations",
journal = j-TOCS,
volume = "1",
number = "3",
pages = "195--221",
month = aug,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Parallel/ovr.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Schlichting:1983:FSP,
author = "Richard D. Schlichting and Fred B. Schneider",
title = "Fail-Stop Processors: An Approach to Designing
Fault-Tolerant Computing Systems",
journal = j-TOCS,
volume = "1",
number = "3",
pages = "222--238",
month = aug,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Distributed/distfs.bib;
ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Akl:1983:CSP,
author = "Selim G. Akl and Peter D. Taylor",
title = "Cryptographic Solution to a Problem of Access Control
in a Hierarchy",
journal = j-TOCS,
volume = "1",
number = "3",
pages = "239--248",
month = aug,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bauer:1983:KDP,
author = "R. K. Bauer and T. A. Berson and R. J. Feiertag",
title = "A Key Distribution Protocol Using Event Markers",
journal = j-TOCS,
volume = "1",
number = "3",
pages = "249--255",
month = aug,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Misc/misc.1.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kemmerer:1983:SRM,
author = "Richard A. Kemmerer",
title = "Shared Resource Matrix Methodology: An Approach to
Identifying Storage and Timing Channels",
journal = j-TOCS,
volume = "1",
number = "3",
pages = "256--277",
month = aug,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jones:1983:PSI,
author = "Anita K. Jones",
title = "Preface to Special Issue",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "279--280",
month = nov,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Strecker:1983:TBC,
author = "William D. Strecker",
title = "Transient Behavior of Cache Memories",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "281--293",
month = nov,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kobayashi:1983:ORC,
author = "Hiroshi Kobayashi and Mario Gerla",
title = "Optimal Routing in Closed Queueing Networks",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "294--310",
month = nov,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
annote = "\ldots{} This paper addresses the problem of obtaining
the set of routing probabilities that will minimize
response time, or alternatively maximize the
throughput. An algorithm, called the flow deviation
(FD) algorithm, is already known for obtaining the
optimal routing probabilities for open queueing network
models \ldots{}",
country = "USA",
date = "28/11/84",
descriptors = "Closed queueing network; routing algorithm",
enum = "1726",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
language = "English",
location = "RWTH-AC-DFV: Bibl.",
references = "0",
revision = "21/04/91",
}
@Article{Sloan:1983:MEB,
author = "Lansing Sloan",
title = "Mechanisms that Enforce Bounds on Packet Lifetimes",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "311--330",
month = nov,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Shankar:1983:HPS,
author = "A. Udaya Shankar and Simon S. Lam",
title = "An {HDLC} Protocol Specification and Its Verifications
Using Image Protocols",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "331--368",
month = nov,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Sauer:1983:CCA,
author = "Charles H. Sauer",
title = "Corrigendum: Computational Algorithms for
State-Dependent Queuing Networks",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "369--369",
month = nov,
year = "1983",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Distributed/QLD/1983.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "See \cite{Sauer:1983:CAS}.",
acknowledgement = ack-nhfb,
country = "USA",
date = "13/05/93",
descriptors = "Queueing network; product form; analysis",
enum = "7840",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
language = "English",
location = "SEL: Wi",
references = "0",
revision = "16/01/94",
}
@Article{Anonymous:1984:I,
author = "Anonymous",
title = "Index",
journal = j-TOCS,
volume = "1",
number = "4",
pages = "370--371",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jones:1984:PSI,
author = "Anita K. Jones",
title = "Preface to Special Issue",
journal = j-TOCS,
volume = "2",
number = "1",
pages = "1--1",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Schroeder:1984:EGG,
author = "Michael D. Schroeder and Andrew D. Birrell and Roger
M. Needham",
title = "Experience with {Grapevine}: The Growth of a
Distributed System",
journal = j-TOCS,
volume = "2",
number = "1",
pages = "3--23",
month = feb,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 15 14:49:51 1987",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold/1984.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Distributed/Danzig.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Grapevine is a distributed, replicated system that
provides message delivery, naming, authentication,
resource location, and access control services in an
internet of computers. The system, described in a
previous paper [1], was designed and implemented
several years ago. We now have had operational
experience with the system under substantial load. In
this paper we report on what we have learned from using
Grapevine.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Design; experimentation; Grapevine; reliability; TOCS
operating systems distributed systems database
systems",
owner = "manning",
}
@Article{Lindsay:1984:CCR,
author = "Bruce G. Lindsay and Laura M. Haas and C. Mohan and
Paul F. Wilms and Robert A. Yost",
title = "Computation and Communication in {R}: a Distributed
Database Manager",
journal = j-TOCS,
volume = "2",
number = "1",
pages = "24--38",
month = feb,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold/1984.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "Also published in/as: SOSP 9, Bretton Woods, Oct.
1983.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Rstar; TOCS",
}
@Article{Birrell:1984:IRP,
author = "Andrew D. Birrell and Bruce Jay Nelson",
key = "Birrell \& Nelson",
title = "Implementing Remote Procedure Calls",
journal = j-TOCS,
volume = "2",
number = "1",
pages = "39--59",
month = feb,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 15 14:59:58 1987",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Compiler/bcp.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Distributed/networks.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/misc.1.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib;
ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Remote procedure calls (RPC) appear to be a useful
paradigm for providing communication across a network
between programs written in a high-level language. This
paper describes a package providing a remote procedure
call facility, the options that face the designer of
such a package, and the decisions we made. We describe
the overall structure of our RPC mechanism, our
facilities for binding RPC clients, the transport
level, communication protocol, and some performance
measurements. We include descriptions of some
optimizations used to achieve high performance and to
minimize the load on server machines that have many
clients.",
acknowledgement = ack-nhfb,
checked = yes,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Design; distributed naming and binding;
experimentation; inter-process communication;
performance; performance of communication protocols;
remote procedure calls; RPC, Cedar; RPC, transport
layer protocol; security; TOCS; transport layer
protocols",
memos = "The idea of RPC was first suggested by J. E. White in
a paper entitled ``A high-level framework for
network-based resource sharing'' in the Proceedings of
the National Computer Conference in June 1976. The
implementation of RPC described in the paper is the one
from the {\em Cedar\/} project at Xerox.",
owner = "manning",
}
@Article{Berkovich:1984:CCT,
author = "Simon Y. Berkovich and Colleen Roe Wilson",
title = "A Computer Communication Technique Using
Content-Induced Transaction Overlap",
journal = j-TOCS,
volume = "2",
number = "1",
pages = "60--77",
month = feb,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Kameda:1984:OCP,
author = "Hisao Kameda",
title = "Optimality of a Central Processor Scheduling Policy
for Processing a Job Stream",
journal = j-TOCS,
volume = "2",
number = "1",
pages = "78--90",
month = feb,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Smith:1984:PSI,
author = "Alan Jay Smith",
title = "Preface to Special Issue",
journal = j-TOCS,
volume = "2",
number = "2",
pages = "91--92",
month = may,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Marsan:1984:CGS,
author = "Marco Ajmone Marsan and Gianni Conte and Gianfranco
Balbo",
title = "A Class of Generalized Stochastic {Petri} Nets for the
Performance Evaluation of Multiprocessor Systems",
journal = j-TOCS,
volume = "2",
number = "2",
pages = "93--122",
month = may,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Misc/Discrete.event.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib;
ftp://ftp.ira.uka.de/pub/bibliography/SE/uni-do.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "GSPN",
}
@Article{Tantawi:1984:PAC,
author = "Asser N. Tantawi and Manfred Ruschitzka",
title = "Performance Analysis of Checkpointing Strategies",
journal = j-TOCS,
volume = "2",
number = "2",
pages = "123--144",
month = may,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Schneider:1984:BGA,
author = "Fred B. Schneider",
title = "{Byzantine} Generals in Action: Implementing Fail-Stop
Processors",
journal = j-TOCS,
volume = "2",
number = "2",
pages = "145--154",
month = may,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Distributed/distfs.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib;
ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Stamos:1984:SGS,
author = "James W. Stamos",
title = "Static Grouping of Small Objects to Enhance
Performance of a Paged Virtual Memory",
journal = j-TOCS,
volume = "2",
number = "2",
pages = "155--180",
month = may,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Compiler/gc.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Parallel/distmem.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Smalltalk is an object-oriented, interactive
programming environment that maintains sate between
user sessions. Because of the persistence of objects,
it is possible to use program restructuring techniques
to statically relocate objects in virtual memory.
Grouping related objects on the same disk page
increases locality of reference, reduces the number of
page faults, and improves performance. Five types of
static grouping algorithms along with the static
analysis performed on their outputs, and empirical
evidence of their performance are presented.",
acknowledgement = ack-nhfb,
comment = "Using the garbage collector to improve performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "clustering TOCS",
}
@Article{McKusick:1984:FFS,
author = "Marshall K. McKusick and William N. Joy and Sam J.
Leffler and Robert S. Fabry",
key = "McKusick et al.",
title = "A Fast File System for {UNIX}",
journal = j-TOCS,
volume = "2",
number = "3",
pages = "181--197",
month = aug,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Feb 7 10:11:41 1985",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/unix.1.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "A reimplementation of the UNIX file system is
described. The reimplementation provides substantially
higher throughput rates by using more flexible
allocation policies that allow better locality of
reference and can be adapted to a wide range of
peripheral and processor characteristics. The new file
system clusters data that is sequentially accessed and
provides two block sizes to allow fast access to large
files while not wasting large amounts of space for
small files. File access rates of up to ten times
faster than the traditional UNIX file system are
experienced. Long-needed enhancements to the
programmers' interface are discussed. These include a
mechanism to place advisory locks on files, extensions
of the name space across file systems, the ability to
use long file names, and provisions for administrative
control of resource usage.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS; UNIX, file system organization, file system
performance, file system design, application program
interface",
}
@Article{Landwehr:1984:SMM,
author = "Carl E. Landwehr and Constance L. Heitmeyer and John
McLean",
title = "A Security Model for Military Message System",
journal = j-TOCS,
volume = "2",
number = "3",
pages = "198--222",
month = aug,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Wiederhold/1984.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/bibsec.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Schwarz:1984:SSA,
author = "Peter M. Schwarz and Alfred Z. Spector",
title = "Synchronizing Shared Abstract Types",
journal = j-TOCS,
volume = "2",
number = "3",
pages = "223--250",
month = aug,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Chang:1984:RBP,
author = "Jo-Mei Chang and N. F. Maxemchuk",
title = "Reliable Broadcast Protocols",
journal = j-TOCS,
volume = "2",
number = "3",
pages = "251--273",
month = aug,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Compiler/gc.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/misc.1.bib;
ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
comment = "Atomic multicast protocol.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Consensus / Broadcast; Multicast; TOCS operating
distributed systems reliability networks
communication",
}
@Article{Anonymous:1984:IA,
author = "Anonymous",
title = "Information for Authors",
journal = j-TOCS,
volume = "2",
number = "3",
pages = "274--276",
month = aug,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:18:40 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Saltzer:1984:EEA,
author = "J. H. Saltzer and D. P. Reed and D. D. Clark",
key = "Saltzer et al.",
title = "End-to-End Arguments in System Design",
journal = j-TOCS,
volume = "2",
number = "4",
pages = "277--288",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Mar 6 11:12:06 1985",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/digital.library.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/os.bib;
ftp://ftp.ira.uka.de/pub/bibliography/SE/dependability.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This paper presents a design principle that helps
guide placement of functions among the modules of a
distributed computer system. The principle, called the
end-to-end argument, suggests that functions placed at
low levels of a system may be redundant or of little
value when compared with the cost of providing them at
that low level. Examples discussed in the paper include
bit-error recovery, security using encryption,
duplicate message suppression, recovery from system
crashes, and delivery acknowledgement. Low-level
mechanisms to support these functions are justified
only as performed enhancements.",
acknowledgement = ack-nhfb,
comments = "Argues that you should put functionality at the higher
app layers, rather than at low layers. Includes a
security example",
entered-by = "Andreas Paepcke",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "data communication; design; design principles;
protocol design; TOCS",
}
@Article{Smith:1984:DAE,
author = "James E. Smith",
title = "Decoupled Access\slash Execute Computer
Architectures",
journal = j-TOCS,
volume = "2",
number = "4",
pages = "289--308",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Math/sparse.linear.systems.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Tichy:1984:SSC,
author = "Walter F. Tichy",
title = "The String-to-String Correction Problem with Block
Moves",
journal = j-TOCS,
volume = "2",
number = "4",
pages = "309--321",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/allison.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Misc/protein.pattern.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Uses block moves as the edit operation, seeks min'
number, gets a $O(n)$ linear algorithm if P. Weiner's
data-structure used. $S$ source string, $T$ target
string. Algorithm: find longest prefix of $T$ that is a
substring of $S$, this gives the first block move;
repeat until done. Proof: by induction on number of
block moves. One block move - $T$ must obviously be a
substring of $S$, alg' finds this. Suppose optimal is
$i$ block moves and alg' fails, i.e., finds $j > i$
moves.
T:----Opt1----$|$--Opt2--$|$---Opt3---$|$.....$|$--Algi--
T:-----Alg1-----$|$---Alg2---$|$.....$|$-----Algj------
NB. $|$Alg1$|$ $>$= $|$Opt1$|$ Delete the substring
Alg1. By induction the alg' would find the opt'
explanation of the rest of $T$ - but it doesn't :-
contradiction.",
acknowledgement = ack-nhfb,
comment = "``An algorithm that produces the shortest edit
sequence transforming one string into another is
presented. The algorithm is optimal in the sense that
it generated a minimal covering set of common
substrings of one string with respect to another. Two
improvements of the basic algorithm are developed. The
first improvement performs well on strings with few
replicated symbols. The second improvement runs in time
and space linear to the size of the input. Efficient
algorithms for regenerating a string from an edit
sequence are also presented.'' longest common
sequence",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "longest common subsequence, LCS, LCSS, edit distance,
block, move, TOCS, string to strings, sequence,
alignment, linear, algorithm; TOCS",
}
@Article{Rom:1984:OSC,
author = "Raphael Rom",
title = "Ordering Subscribers on Cable Networks",
journal = j-TOCS,
volume = "2",
number = "4",
pages = "322--334",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Bryant:1984:MPA,
author = "Raymond M. Bryant and Anthony E. Krzesinski and M.
Seetha Lakshmi and K. Mani Chandy",
title = "The {MVA} Priority Approximation",
journal = j-TOCS,
volume = "2",
number = "4",
pages = "335--359",
month = nov,
year = "1984",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 11:57:59 1999",
bibsource = "ftp://ftp.ira.uka.de/pub/bibliography/Database/Graefe.bib;
ftp://ftp.ira.uka.de/pub/bibliography/Os/IMMD_IV.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "TOCS",
}
@Article{Birrell:1985:SCU,
author = "Andrew D. Birrell",
title = "Secure Communication Using Remote Procedure Calls",
journal = j-TOCS,
volume = "3",
number = "1",
pages = "1--14",
month = feb,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p1-birrell/",
abstract = "Research on encryption-based secure communication
protocols has reached a stage where it is feasible to
construct end-to-end secure protocols. The design of
such a protocol, built as part of a remote procedure
call package, is described. The security abstraction
presented to users of the package, the authentication
mechanisms, and the protocol for encrypting and
verifying remote calls are also described.",
acknowledgement = ack-nhfb,
affiliationaddress = "Xerox Corp",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer networks; cryptography; design;
experimentation; Protocols; remote procedure calls;
secure communication; security",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Cryptographic controls. {\bf C.2.0}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, General, Security and protection (e.g.,
firewalls). {\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture.",
}
@Article{Skeen:1985:DLP,
author = "Dale Skeen",
title = "Determining the Last Process to Fail",
journal = j-TOCS,
volume = "3",
number = "1",
pages = "15--30",
month = feb,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p15-skeen/",
abstract = "A total failure occurs whenever all processes
cooperatively executing a distributed task fail before
the task completes. A frequent prerequisite for
recovery from a total failure is identification of the
last set (LAST) of processes to fail. Necessary and
sufficient conditions are derived here for computing
LAST from the local failure data of recovered
processes. These conditions are then translated into
procedures for deciding LAST membership, using either
complete or incomplete failure data. The choice of
failure data is itself dictated by two requirements:
(1) it can be cheaply maintained, and (2) it must
afford maximum fault-tolerance in the sense that the
expected number of recoveries required for identifying
LAST is minimized.",
acknowledgement = ack-nhfb,
affiliationaddress = "Cornell Univ, Ithaca, NY, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer systems, digital; cooperative
processes; database systems --- Distributed;
Distributed; event ordering; reliability; total
failure",
subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Reliability,
availability, and serviceability. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability, Checkpoint/restart.
{\bf H.2.2} Information Systems, DATABASE MANAGEMENT,
Physical Design, Recovery and restart.",
}
@Article{Clark:1985:PVT,
author = "Douglas W. Clark and Joel S. Emer",
title = "Performance of the {VAX-11\slash 780} Translation
Buffer: Simulation and Measurement",
journal = j-TOCS,
volume = "3",
number = "1",
pages = "31--62",
month = feb,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p31-clark/",
abstract = "A virtual-address translation buffer (TB) is a
hardware cache of recently used virtual-to-physical
address mappings. The authors present the results of a
set of measurements and simulations of translation
buffer performance in the VAX-11\slash 780. Two
different hardware monitors were attached to
VAX-11\slash 780 computers, and translation buffer
behavior was measured. Measurements were made under
normal time-sharing use and while running reproducible
synthetic time-sharing work loads. Reported
measurements include the miss ratios of data and
instruction references, the rate of TB invalidations
due to context switches, and the amount of time taken
to service TB misses. Additional hardware measurements
were made with half the TB disabled. Trace-driven
simulations of several programs were also run; the
traces captured system activity as well as user-mode
execution. Several variants of the 11\slash 780 TB
structure were simulated.",
acknowledgement = ack-nhfb,
affiliationaddress = "Digital Equipment Corp, Littleton, MA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "cache memories; computer simulation; computers,
digital --- Performance; data storage units; design;
experimentation; hardware monitor; measurement;
performance; trace-driven simulation; translation
buffer",
subject = "{\bf C.1.1} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Single Data Stream Architectures, VAX.
{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles,
Associative memories. {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Cache memories. {\bf B.3.2}
Hardware, MEMORY STRUCTURES, Design Styles, Virtual
memory. {\bf B.3.3} Hardware, MEMORY STRUCTURES,
Performance Analysis and Design Aids**, Simulation**.",
}
@Article{Chandy:1985:DSD,
author = "K. Mani Chandy and Leslie Lamport",
title = "Distributed Snapshots: Determining Global States of
Distributed Systems",
journal = j-TOCS,
volume = "3",
number = "1",
pages = "63--75",
month = feb,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-1/p63-chandy/",
abstract = "This paper presents an algorithm by which a process in
a distributed system determines a global state of the
system during a computation. Many problems in
distributed systems can be cast in terms of the problem
of detecting global states. For instance, the global
state detection algorithm helps to solve an important
class of problems: stable property detection. A stable
property is one that persists: once a stable property
becomes true it remains true thereafter. Examples of
stable properties are `computation has terminated',
`the system is deadlocked' and `all tokens in a token
ring have disappeared. ' The stable property detection
problem is that of devising algorithms to detect a
given stable property. Global state detection can also
be used for checkpointing.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Texas at Austin, Austin, TX, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer programming --- Algorithms;
computer systems, digital; Distributed; distributed
deadlock detection; distributed snapshots; global
states",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Concurrency. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Deadlocks. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Mutual exclusion. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Scheduling. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Synchronization. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Backup procedures. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability,
Checkpoint/restart. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability,
Verification.",
}
@Article{Cheriton:1985:DPG,
author = "David R. Cheriton and Willy Zwaenepoel",
title = "Distributed Process Groups in the {V} Kernel",
journal = j-TOCS,
volume = "3",
number = "2",
pages = "77--107",
month = may,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p77-cheriton/",
abstract = "The V kernel supports an abstraction of processes,
with operations for interprocess communication, process
management, and memory management. This abstraction is
used as a software base for constructing distributed
systems. As a distributed kernel, the V kernel makes
intermachine boundaries largely transparent. In this
environment of many cooperating processes on different
machines, there are many logical groups of processes.
In this paper we describe the extension of the V kernel
to support process groups. Operations on groups include
group interprocess communication. Aspects of the
implementation and performance, and initial experience
with applications are discussed.",
acknowledgement = ack-nhfb,
affiliationaddress = "Stanford Univ, Stanford, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer operating systems; computer
systems, digital --- Distributed; design; distributed
process groups; measurement; performance; V kernel",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf D.4.7} Software, OPERATING
SYSTEMS, Organization and Design.",
}
@Article{Even:1985:PCC,
author = "S. Even and O. Goldreich",
title = "On the Power of Cascade Ciphers",
journal = j-TOCS,
volume = "3",
number = "2",
pages = "108--116",
month = may,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p108-even/",
abstract = "The unicity distance of a cascade of random ciphers,
with respect to known plaintext attack, is shown to be
the sum of the key lengths. At time-space trade-off for
the exhaustive cracking of a cascade of ciphers is
shown. The structure of the set of permutations
realized by a cascade is studied; it is shown that only
l. 2**k exhaustive experiments are necessary to
determine the behavior of a cascade of l stages, each
having k key bits. It is concluded that the cascade of
random ciphers is not a random cipher. Yet, it is shown
that, with probability, the number of permutations
realizable by a cascade of l random ciphers, each
having k key bits, is 2**l**k. Next, it is shown that
two stages are not worse than one, by a simple
reduction of the cracking problem of any of the stages
to the cracking problem of the cascade. Finally, it is
shown that proving a nonpolynomial lower bound on the
cracking problem of long cascades is a hard task, since
such a bound implies that P does not equal NP.",
acknowledgement = ack-nhfb,
affiliationaddress = "Technion-Israel Inst of Technology, Haifa, Isr",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; cascade ciphers; cryptography; data
encryption; data processing --- Security of Data;
random ciphers; security; theory; unicity distance",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection. {\bf E.3} Data, DATA ENCRYPTION.",
}
@Article{Padmanabhan:1985:PAR,
author = "Krishnan Padmanabhan and Duncan H. Lawrie",
title = "Performance Analysis of Redundant-Path Networks for
Multiprocessor Systems",
journal = j-TOCS,
volume = "3",
number = "2",
pages = "117--144",
month = may,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p117-padmanabhan/",
abstract = "Performance of a class of multistage interconnection
networks employing redundant paths is investigated.
Redundant path networks provide significant tolerance
to faults at minimal costs; in this paper improvements
in performance and very graceful degradation are also
shown to result from the availability of redundant
paths. A Markov model is introduced for the operation
of these networks in the circuit-switched mode and is
solved numerically to obtain the performance measures
of interest. The structure of the networks that provide
maximal performance is also characterized.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Illinois at Urbana-Champaign, Urbana,
IL, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer systems, digital; design; Multiprocessing;
multistage interconnection networks; performance;
performance analysis; redundant-path networks",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Performance attributes. {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors),
Interconnection architectures. {\bf C.1.2} Computer
Systems Organization, PROCESSOR ARCHITECTURES, Multiple
Data Stream Architectures (Multiprocessors),
Multiple-instruction-stream, multiple-data-stream
processors (MIMD). {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors), Parallel
processors**. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS, Design studies. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS,
Modeling techniques.",
}
@Article{Maekawa:1985:AME,
author = "Mamoru Maekawa",
title = "A {$\sqrt{N}$} Algorithm for Mutual Exclusion in
Decentralized Systems",
journal = j-TOCS,
volume = "3",
number = "2",
pages = "145--159",
month = may,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-2/p145-maekawa/",
abstract = "An algorithm is presented that uses only c ROOT N
messages to create mutual exclusion in a computer
network, where N is the number of nodes and c a
constant between 3 and 5. The algorithm is symmetric
and allows fully parallel operation.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Tokyo, Dep of Information Science,
Tokyo, Jpn",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer networks; computer programming
--- Algorithms; decentralized systems; design; mutual
exclusion; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Mutual exclusion. {\bf C.2.1} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Network Architecture and Design, Network
communications. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Network operating systems.",
}
@Article{Smith:1985:DCM,
author = "Alan Jay Smith",
title = "Disk Cache --- Miss Ratio Analysis and Design
Considerations",
journal = j-TOCS,
volume = "3",
number = "3",
pages = "161--203",
month = aug,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-3/p161-smith/",
abstract = "The current trend of computer system technology is
toward CPUs with rapidly increasing processing power
and toward disk drives of rapidly increasing density,
but with disk performance increasing very slowly if at
all. The implication of these trends is that at some
point the processing power of computer systems will be
limited by the throughput of the input\slash output
(I/O) system. A solution to this problem, which is
described and evaluated in this paper, is disk cache.
The idea is to buffer recently used portions of the
disk address space in electronic storage. Experimental
results are based on extensive trace-driven simulations
using traces taken from three large IBM or
IBM-compatible mainframe data processing installations.
We find that disk cache is a powerful means of
extending the performance limits of high-end computer
systems.",
acknowledgement = ack-nhfb,
affiliation = "Univ of California, Dep of Electrical Engineering \&
Computer Sciences, Berkeley, CA, USA",
affiliationaddress = "Univ of California, Dep of Electrical
Engineering \& Computer Sciences, Berkeley, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "cache controller; computer systems, digital; data
storage units; design; disk cache; experimentation; I/O
buffer; measurement; performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf B.4.2} Hardware,
INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output
Devices, Channels and controllers. {\bf B.3.2}
Hardware, MEMORY STRUCTURES, Design Styles, Mass
storage. {\bf B.3.3} Hardware, MEMORY STRUCTURES,
Performance Analysis and Design Aids**. {\bf D.4.2}
Software, OPERATING SYSTEMS, Storage Management.",
}
@Article{Strom:1985:ORD,
author = "Robert E. Strom and Shaula Yemini",
title = "Optimistic Recovery in Distributed Systems",
journal = j-TOCS,
volume = "3",
number = "3",
pages = "204--226",
month = aug,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-3/p204-strom/",
abstract = "Optimistic Recovery is a new technique supporting
application-independent transparent recovery from
processor failures in distributed systems. In
optimistic recovery communication, computation and
checkpointing proceed asynchronously. Synchronization
is replaced by causal dependency tracking, which
enables a posteriori reconstruction of a consistent
distributed system state following a failure using
process rollback and message replay. Because there is
no synchronization among computation, communication,
and checkpointing, optimistic recovery can tolerate the
failure of an arbitrary number of processors and yields
better throughput and response time than other general
recovery techniques whenever failures are infrequent.",
acknowledgement = ack-nhfb,
affiliationaddress = "IBM, Thomas J. Watson Research Cent, Yorktown
Heights, NY, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer operating systems; computer
programming --- Algorithms; computer systems, digital;
Distributed; optimistic algorithms; optimistic
recovery; reliability; verification",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems. {\bf C.1.2} Computer Systems Organization,
PROCESSOR ARCHITECTURES, Multiple Data Stream
Architectures (Multiprocessors). {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming.",
}
@Article{Tay:1985:EBP,
author = "Y. C. Tay and Rajan Suri",
title = "Error Bounds for Performance Prediction in Queuing
Networks",
journal = j-TOCS,
volume = "3",
number = "3",
pages = "227--254",
month = aug,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-3/p227-tay/",
abstract = "Analytic models based on closed queuing networks
(CQNS) are widely used for performance prediction in
practical systems. In using such models, there is
always a prediction error, that is, a difference
between the predicted performance and actual outcome.
This prediction error is due both to modeling errors
and estimation errors, the latter being the difference
between the estimated values of the CQN parameters and
the actual outcomes. This paper considers the second
class of errors; in particular, it studies the effect
of small estimation errors and provides bounds on
prediction errors based on bounds on estimation errors.
Estimation errors may be divided into two types: (1)
the difference between the estimated value and the
average value of the outcome, and (2) the deviation of
the actual value from its average. The analysis first
studies the sum of both types of errors, then the
second type alone. The results are illustrated with
three examples.",
acknowledgement = ack-nhfb,
affiliationaddress = "Natl Univ of Singapore, Dep of Mathematics, Kent
Ridge, Singapore",
classification = "722; 723; 922",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "closed queuing networks; computer systems, digital;
error bounds; measurement; performance; Performance;
probability --- Queueing Theory; product form networks;
queuing networks; verification",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Modeling techniques. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Stochastic analysis.
{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Queueing theory. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Modeling and prediction.",
}
@Article{Brown:1985:AFS,
author = "Mark R. Brown and Karen N. Kolling and Edward A.
Taft",
title = "The {Alpine} File System",
journal = j-TOCS,
volume = "3",
number = "4",
pages = "261--293",
month = nov,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p261-brown/",
abstract = "Alpine is a file system that supports atomic
transactions and is designed to operate as a service on
a computer network. Alpine's primary purpose is to
store files that represent databases. An important
secondary goal is to store ordinary files representing
documents, program modules, and the like. Unlike other
file servers described in the literature, Alpine uses a
log-based technique to implement atomic file update.
Another unusual aspect of Alpine is that it performs
all communication via a general-purpose remote
procedure call facility. Both of these decisions have
worked out well. This paper describes Alpine's design
and implementation, and evaluates the system in light
of our experience to date. The Cedar language and
programming environment is used to develop Alpine.",
acknowledgement = ack-nhfb,
affiliationaddress = "Xerox Corp, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "alpine; cedar; computer networks; computer programming
languages; database systems; design; experimentation;
file servers; file system; reliability",
subject = "{\bf D.4.0} Software, OPERATING SYSTEMS, General,
Alpine. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Checkpoint/restart. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management. {\bf D.3.2}
Software, PROGRAMMING LANGUAGES, Language
Classifications, Cedar. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Backup procedures. {\bf D.4.7}
Software, OPERATING SYSTEMS, Organization and Design,
Distributed systems. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Distributed databases.",
}
@Article{DeMori:1985:RAB,
author = "Renato {De Mori} and R{\'e}gis Cardin",
title = "A Recursive Algorithm for Binary Multiplication and
its Implementation",
journal = j-TOCS,
volume = "3",
number = "4",
pages = "294--314",
month = nov,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p294-de_mori/",
abstract = "A new recursive algorithm for deriving the layout of
parallel multipliers is presented. Based on this
algorithm, a network for performing multiplications of
two's complement numbers is proposed. The network can
be implemented in a synchronous or an asynchronous way.
If the factors to be multiplied have N bits, the area
complexity of the network is O(N**2) for practical
values of N as in the case of cellular multipliers. Due
to the design approach based on a recursive algorithm,
a time complexity O(log N) is achieved. It is shown how
the structure can be pipelined with period complexity
O(1) and used for single and double precision
multiplication.",
acknowledgement = ack-nhfb,
affiliationaddress = "Concordia Univ, Dep of Computer Science,
Montreal, Que, Can",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Algorithms; binary multiplication;
complexity; computer programming; computer systems,
digital --- Parallel Processing; design; performance;
recursive algorithm",
subject = "{\bf B.2.1} Hardware, ARITHMETIC AND LOGIC STRUCTURES,
Design Styles, Parallel. {\bf B.2.1} Hardware,
ARITHMETIC AND LOGIC STRUCTURES, Design Styles,
Pipeline. {\bf C.5.4} Computer Systems Organization,
COMPUTER SYSTEM IMPLEMENTATION, VLSI Systems.",
}
@Article{Chow:1985:DCM,
author = "Ching-Hua Chow and Mohamed G. Gouda and Simon S. Lam",
title = "A Discipline for Constructing Multiphase Communication
Protocols",
journal = j-TOCS,
volume = "3",
number = "4",
pages = "315--343",
month = nov,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p315-chow/",
abstract = "Many communication protocols can be observed to go
through different phases performing a distinct function
in each phase. A multiphase model for such protocols is
presented. A phase is formally defined to be a network
of communicating finite-state machines with certain
desirable correctness properties; these include proper
termination and freedom from deadlocks and unspecified
receptions. A multifunction protocol is constructed by
first constructing separate phases to perform its
different functions. It is shown how to connect these
phases together to realize the multifunction protocol
so that the resulting network of communicating finite
state machines is also a phase (i. e., it possesses the
desirable properties defined for phases). The
modularity inherent in multiphase protocols facilitates
not only their construction but also their
understanding and modification. An abundance of
protocols have been found in the literature that can be
constructed as multiphase protocols. Three examples are
presented here: two versions of IBM's BSC protocol for
data link control and a token ring network protocol.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Texas at Austin, Dep of Computer
Sciences, Austin, TX, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; BSC protocols; computer networks; design;
multiphase communication protocols; Protocols; theory;
token ring network protocol; verification",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf B.4.4} Hardware,
INPUT/OUTPUT AND DATA COMMUNICATIONS, Performance
Analysis and Design Aids**, Formal models**. {\bf
B.4.4} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS,
Performance Analysis and Design Aids**, Verification**.
{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol verification. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf
D.2.2} Software, SOFTWARE ENGINEERING, Design Tools and
Techniques, Modules and interfaces. {\bf D.2.2}
Software, SOFTWARE ENGINEERING, Design Tools and
Techniques, Structured programming**. {\bf D.2.4}
Software, SOFTWARE ENGINEERING, Software/Program
Verification, Correctness proofs. {\bf D.2.4} Software,
SOFTWARE ENGINEERING, Software/Program Verification,
Validation.",
}
@Article{Suzuki:1985:DME,
author = "Ichiro Suzuki and Tadao Kasami",
title = "A Distributed Mutual Exclusion Algorithm",
journal = j-TOCS,
volume = "3",
number = "4",
pages = "344--349",
month = nov,
year = "1985",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1985-3-4/p344-suzuki/",
abstract = "A distributed algorithm is presented that realizes
mutual exclusion among N nodes in a computer network.
The algorithm requires at most N message exchanges for
one mutual exclusion invocation. Accordingly, the delay
to invoke mutual exclusion is smaller than in an
algorithm of Ricart and Agrawala, which requires
2*(N-1) message exchanges per invocation. A drawback of
the algorithm is that the sequence numbers contained in
the messages are unbounded. It is shown that this
problem can be overcome by slightly increasing the
number of message exchanges.",
acknowledgement = ack-nhfb,
affiliationaddress = "Osaka Univ, Toyonaka, Jpn",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Algorithms; algorithms; computer networks; computer
programming; distributed mutual exclusion algorithm;
message exchange; process management",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Mutual exclusion. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems.",
}
@Article{Smith:1986:IGP,
author = "Connie U. Smith",
title = "Independent General Principles for Constructing
Responsive Software Systems",
journal = j-TOCS,
volume = "4",
number = "1",
pages = "1--31",
month = feb,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p1-smith/",
abstract = "Three general principles are presented that can be
applied in early software life cycle stages for the
definition of software requirements and designs with
acceptable performance. They are genuine high-level
considerations for meeting responsiveness goals without
sacrificing understandability and maintainability, and
without increasing development time and cost. The
principles are derived from the interrelationships of
two performance models: a queueing network based on
computer system model and an execution graph software
model. The performance effect of each of the principles
is quantified using the models. Examples are given that
illustrate how they can be applied to software
systems.",
acknowledgement = ack-nhfb,
affiliationaddress = "Duke Univ, Dep of Computer Science, Durham, NC,
USA",
classification = "722; 723; 921",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer software; computer systems, digital ---
Performance; design; mathematical models; Performance;
performance",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Design studies. {\bf D.2.10} Software,
SOFTWARE ENGINEERING, Design**. {\bf C.4} Computer
Systems Organization, PERFORMANCE OF SYSTEMS,
Performance attributes. {\bf D.0} Software, GENERAL.
{\bf D.2.9} Software, SOFTWARE ENGINEERING, Management,
Life cycle. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Modeling and prediction. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Operational
analysis.",
}
@Article{Herlihy:1986:QCR,
author = "Maurice Herlihy",
title = "A Quorum-Consensus Replication Method for Abstract
Data Types",
journal = j-TOCS,
volume = "4",
number = "1",
pages = "32--53",
month = feb,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p32-herlihy/",
abstract = "Replication can enhance the availability of data in
distributed systems. This paper introduces a new method
for managing replicated data. Unlike many methods that
support replication only for uninterpreted files, this
method systematically exploits type-specific properties
of objects such as sets, queues, or directories to
provide more effective replication. Each operation
requires the cooperation of a certain number of sites
for its successful completion. A quorum for an
operation is any such set of sites. Necessary and
sufficient constraints on quorum intersections are
derived from an analysis of the data type's algebraic
structure. A reconfiguration method is proposed that
permits quorums to be changed dynamically. By taking
advantage of type-specific properties in a general and
systematic way, this method can realize a wider range
of availability properties and more flexible
reconfiguration than comparable replication methods.",
acknowledgement = ack-nhfb,
affiliationaddress = "Carnegie-Mellon Univ, Computer Science Dep,
Pittsburgh, PA, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "abstract data types; algorithms; computer operating
systems; computer programming languages; database
systems; reliability; replication method;
verification",
subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming. {\bf D.3.3} Software,
PROGRAMMING LANGUAGES, Language Constructs and
Features, Abstract data types. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, Distributed
file systems. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance. {\bf H.2.4} Information
Systems, DATABASE MANAGEMENT, Systems, Distributed
databases. {\bf H.2.4} Information Systems, DATABASE
MANAGEMENT, Systems, Transaction processing.",
}
@Article{Joseph:1986:LCM,
author = "Thomas A. Joseph and Kenneth P. Birman",
title = "Low Cost Management of Replicated Data in
Fault-Tolerant Distributed Systems",
journal = j-TOCS,
volume = "4",
number = "1",
pages = "54--70",
month = feb,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p54-joseph/",
abstract = "Many distributed systems replicate data for fault
tolerance or availability. In such systems, a logical
update on a data item results in a physical update on a
number of copies. The synchronization and communication
required to keep the copies of replicated data
consistent introduce a delay when operations are
performed. In this paper, we describe a technique that
relaxes the usual degree of synchronization, permitting
replicated data items to be updated concurrently with
other operations, while at the same time ensuring that
correctness is not violated. The additional concurrency
thus obtained results in better response time when
performing operations on replicated data. We also
discuss how this technique performs in conjunction with
a roll-back and a roll-forward failure recovery
mechanism.",
acknowledgement = ack-nhfb,
affiliationaddress = "Cornell Univ, Dep of Computer Science, Ithaca,
NY, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer systems, digital --- Distributed;
database systems; fault-tolerant distributed systems;
reliability; replicated data; roll-forward recovery;
update",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed databases. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Concurrency. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Synchronization. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Checkpoint/restart. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf H.2.2} Information Systems,
DATABASE MANAGEMENT, Physical Design, Recovery and
restart. {\bf H.2.4} Information Systems, DATABASE
MANAGEMENT, Systems, Transaction processing.",
}
@Article{Kameda:1986:EJL,
author = "Hisao Kameda",
title = "Effects of Job Loading Policies for Multiprogramming
Systems in Processing a Job Stream",
journal = j-TOCS,
volume = "4",
number = "1",
pages = "71--106",
month = feb,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-1/p71-kameda/",
abstract = "The scheduling of jobs for multiprogramming systems
includes the selection of jobs to be loaded into memory
(job loading policy or memory schedule) and the
scheduling for CPU processing (CPU schedule). There has
been a successful empirical claim for the optimal CPU
schedule; its optimality has been proved in a Markovian
model of job-stream processing that uses the
first-come-first-loaded (FCFL) job loading policy. We
extend this model to gain insight into the effects of
job loading policies. Our investigation, supported by
numerical calculations, suggests that much more care
may be needed in implementing the job loading policy
that aims at the optimal processing capacity than in
implementing the optimal CPU schedule. This agrees with
what has been conjectured on the basis of empirical
studies.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Electro-Communications, Dep of Computer
Science, Chofu, Jpn",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer operating systems; computer systems
programming; finite memory size model; job loading
policies; multiple-resource system; performance;
theory; throughput",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Modeling techniques. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Scheduling. {\bf
D.4.2} Software, OPERATING SYSTEMS, Storage Management,
Allocation/deallocation strategies. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Modeling and
prediction. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Queueing theory. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Stochastic analysis.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management,
Multiprocessing/multiprogramming/multitasking.",
}
@Article{Carriero:1986:NLK,
author = "Nicholas Carriero and David Gelernter",
title = "The {S/Net}'s {Linda} kernel",
journal = j-TOCS,
volume = "4",
number = "2",
pages = "110--129",
month = may,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p110-carriero/",
abstract = "Linda is a parallel programming language that differs
from other parallel languages in its simplicity and in
its support for distributed data structures. The S/Net
is a multicomputer, designed and built at AT\&T Bell
Laboratories, that is based on a fast, word-parallel
bus interconnect. We describe the Linda-supporting
communication kernel we have implemented on the S/Net.
The implementation suggests that Linda's unusual
share-memory-like communication primitives can be made
to run well in the absence of physically shared memory;
the simplicity of the language and of our
implementation's logical structure suggest that similar
Linda implementations might readily be constructed on
related architectures. We outline the language, and
programming methodologies based on distributed data
structures; we then describe the implementation, and
the performance both of the Linda primitives themselves
and of a simple S/Net-Linda matrix-multiplication
program designed to exercise them.",
acknowledgement = ack-nhfb,
affiliationaddress = "Yale Univ, New Haven, CT, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "communication kernel; computer programming languages;
computer systems, digital --- Parallel Processing; data
processing --- Data Structures; design; languages;
Linda parallel programming languages; S/Net",
subject = "{\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language
Constructs and Features, Concurrent programming
structures. {\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, Network operating
systems. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Message sending.",
}
@Article{Kronenberg:1986:VCC,
author = "Nancy P. Kronenberg and Henry M. Levy and William D.
Strecker",
title = "{VAXclusters}: a Closely-Coupled Distributed System",
journal = j-TOCS,
volume = "4",
number = "2",
pages = "130--146",
month = may,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p130-kronenberg/",
abstract = "A VAXcluster is a highly available and extensible
configuration of VAX computers that operate as a single
system. To achieve performance in a multicomputer
environment, a new communications architecture,
communications hardware, and distributed software were
jointly designed. The software is a distributed version
of the VAX\slash VMS operating system that uses a
distributed lock manager to synchronize access to
shared resources. The communications hardware includes
a 70 megabit per second message-oriented interconnect
and an interconnect port that performs communications
tasks traditionally handled by software. Performance
measurements show this structure to be highly
efficient, for example, capable of sending and
receiving 3000 messages per second on a VAX-11\slash
780.",
acknowledgement = ack-nhfb,
affiliationaddress = "Digital Equipment Corp, Littleton, MA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer software; computer systems, digital; design;
Distributed; intersystem communication protocols;
network protocols; performance; reliability;
VAXclusters",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management. {\bf C.2.5} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Local
and Wide-Area Networks, Buses. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, Distributed
file systems. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance.",
}
@Article{Fitzgerald:1986:IVM,
author = "Robert Fitzgerald and Richard F. Rashid",
title = "The Integration of Virtual Memory Management and
Interprocess Communication in {Accent}",
journal = j-TOCS,
volume = "4",
number = "2",
pages = "147--177",
month = may,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p147-fitzgerald/",
abstract = "The integration of virtual memory management and
interprocess communication in the Accent network
operating system kernel is examined. The design and
implementation of the Accent memory management system
is discussed and its performance, both on a series of
message-oriented bench-marks and in normal operation,
is analyzed in detail.",
acknowledgement = ack-nhfb,
affiliationaddress = "Carnegie-Mellon Univ, Pittsburgh, PA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "accent kernel; algorithms; computer operating systems;
data transmission; design; interprocess communication;
measurement; performance; Storage Allocation; virtual
memory management",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Virtual memory. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management, Message
sending. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Operational analysis.
{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Distributed systems. {\bf B.1.5} Hardware,
CONTROL STRUCTURES AND MICROPROGRAMMING, Microcode
Applications, Firmware support of operating
systems/instruction sets**.",
}
@Article{Hoyme:1986:TSM,
author = "K. P. Hoyme and S. C. Bruell and P. V. Afshari and R.
Y. Kain",
title = "A Tree-Structured Mean Value Analysis Algorithm",
journal = j-TOCS,
volume = "4",
number = "2",
pages = "178--185",
month = may,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-2/p178-hoyme/",
abstract = "In a recent paper, S. S. Lam and Y. L Lien described
an algorithm called tree-convolution that can reduce
the space and computation time required for evaluating
sparse multiclass, product-form queueing networks. In
this paper, we develop an exact algorithm based on mean
value analysis (MVA) that is the counterpart of the
tree-convolution algorithm. The order of reduction in
storage and computation achieved by our new Tree-MVA
algorithm compared to the standard MVA algorithm is the
same order of reduction obtained by three-convolution
algorithm over that of the standard convolution
algorithm. Our Three-MVA algorithm preserves the
inherent simplicity of MVA based algorithms.",
acknowledgement = ack-nhfb,
affiliationaddress = "Honeywell Systems \& Research Cent, Minneapolis,
MN, USA",
classification = "723; 921",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Algorithms; algorithms; computer programming; design;
mathematical techniques --- Trees; mean value analysis
algorithm; performance; tree-structured algorithm",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Operational analysis. {\bf C.2.1} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Architecture and Design. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Modeling
techniques. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Modeling and prediction. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Stochastic
analysis.",
}
@Article{Barbara:1986:VVA,
author = "Daniel Barbara and H{\'e}ctor Garc{\'\i}a-Molina",
title = "The Vulnerability of Vote Assignments",
journal = j-TOCS,
volume = "4",
number = "3",
pages = "187--213",
month = aug,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-3/p187-barbara/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; reliability",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Reliability, availability, and serviceability.
{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf B.1.3} Hardware, CONTROL
STRUCTURES AND MICROPROGRAMMING, Control Structure
Reliability, Testing, and Fault-Tolerance**,
Error-checking**.",
}
@Article{Iyer:1986:MMC,
author = "R. K. Iyer and D. J. Rossetti and M. C. Hsueh",
title = "Measurement and Modeling of Computer Reliability as
Affected by System Activity",
journal = j-TOCS,
volume = "4",
number = "3",
pages = "214--237",
month = aug,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-3/p214-iyer/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "measurement; performance; reliability",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Measurement techniques. {\bf C.4} Computer
Systems Organization, PERFORMANCE OF SYSTEMS,
Reliability, availability, and serviceability.",
}
@Article{Lazowska:1986:FAP,
author = "Edward D. Lazowska and John Zahorjan and David R.
Cheriton and Willy Zwaenepoel",
title = "File Access Performance of Diskless Workstations",
journal = j-TOCS,
volume = "4",
number = "3",
pages = "238--268",
month = aug,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-3/p238-lazowska/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Design studies. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Modeling and
prediction. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems.",
}
@Article{Archibald:1986:CCP,
author = "James Archibald and Jean-Loup Baer",
title = "Cache Coherence Protocols: Evaluation Using a
Multiprocessor Simulation Model",
journal = j-TOCS,
volume = "4",
number = "4",
pages = "273--298",
month = nov,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p273-archibald/",
abstract = "Using simulation, we examine the efficiency of several
distributed, hardware-based solutions to the cache
coherence problem in shared-bus multiprocessors. For
each of the approaches, the associated protocol is
outlined. The simulation model is described, and
results from that model are presented. The magnitude of
the potential performance difference between the
various approaches indicates that the choice of
coherence solution is very important in the design of
an efficient shared-bus multiprocessor, since it may
limit the number of processors in the system.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Washington, Seattle, WA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "cache coherence protocols; computer simulation;
computer systems, digital; design; measurement;
Multiprocessing; performance; shared-bus
multiprocessor",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors),
Multiple-instruction-stream, multiple-data-stream
processors (MIMD). {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Measurement
techniques. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf
D.4.2} Software, OPERATING SYSTEMS, Storage Management,
Distributed memories.",
}
@Article{Comer:1986:CBM,
author = "Douglas E. Comer and Larry L. Peterson",
title = "Conversation-Based Mail",
journal = j-TOCS,
volume = "4",
number = "4",
pages = "299--319",
month = nov,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p299-comer/",
abstract = "A new message communication paradigm based on
conversations that provides an alternative to memo-and
conference-based mail is described. A
conversation-based message system groups messages into
conversations, and orders messages within a
conversation according to the context in which they
were written. The message context relation leads to an
efficient implementation of conversations in a
distributed environment and supports a natural ordering
of messages when viewed by the user. Experience with a
prototype demonstrates the workability of
conversation-based mail and suggests that conversations
provide a powerful tool for message communication.",
acknowledgement = ack-nhfb,
affiliationaddress = "Purdue Univ, West Lafayette, IN, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer systems, digital; conversation-based mail;
design; Distributed; electronic mail; human factors;
management; message systems",
subject = "{\bf H.4.3} Information Systems, INFORMATION SYSTEMS
APPLICATIONS, Communications Applications, Electronic
mail. {\bf H.4.3} Information Systems, INFORMATION
SYSTEMS APPLICATIONS, Communications Applications,
Computer conferencing, teleconferencing, and
videoconferencing.",
}
@Article{Badal:1986:DDD,
author = "D. Z. Badal",
title = "The Distributed Deadlock Detection Algorithm",
journal = j-TOCS,
volume = "4",
number = "4",
pages = "320--337",
month = nov,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p320-badal/",
abstract = "We propose a distributed deadlock detection algorithm
for distributed computer systems. We consider two types
of resources, depending on whether the remote resource
lock granularity and mode can or cannot be determined
without access to the remote resource site. We present
the algorithm, its performance analysis, and an
informal argument about its correctness. The proposed
algorithm has a hierarchical design intended to detect
the most frequent deadlocks with maximum efficiency.",
acknowledgement = ack-nhfb,
affiliationaddress = "Hewlett--Packard Lab, Palo Alto, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer programming --- Algorithms;
computer systems, digital; deadlock detection; design;
Distributed; distributed algorithms; message
communication systems; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Deadlocks. {\bf H.2.2} Information Systems,
DATABASE MANAGEMENT, Physical Design, Deadlock
avoidance. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems.",
}
@Article{Carey:1986:PMC,
author = "Michael J. Carey and Waleed A. Muhanna",
title = "The Performance of Multiversion Concurrency Control
Algorithms",
journal = j-TOCS,
volume = "4",
number = "4",
pages = "338--378",
month = nov,
year = "1986",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1986-4-4/p338-carey/",
abstract = "This paper describes a simulation study of the
performance of several multiversion concurrency control
algorithms, investigating the extent to which they
provide increases in the level of concurrency and also
the CPU, I/O, and storage costs resulting from the use
of multiple versions. The algorithms are compared with
regard to performance with their single-version
counterparts and with each other. It is shown that each
algorithm offers significant performance improvements
despite the additional disk accesses involved in
accessing old versions of data; the nature of the
improvement depends on the algorithm in question. It is
also shown that the storage overhead for maintaining
old versions that may be required by ongoing
transactions is not all that large under most
circumstances.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Wisconsin, Madison, WI, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer programming --- Algorithms;
database systems; deadlock avoidance; design;
Distributed; experimentation; multiversion concurrency
control algorithms; performance; transaction
processing",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Simulation. {\bf H.2.2} Information Systems, DATABASE
MANAGEMENT, Physical Design, Deadlock avoidance. {\bf
H.2.2} Information Systems, DATABASE MANAGEMENT,
Physical Design, Recovery and restart. {\bf H.2.4}
Information Systems, DATABASE MANAGEMENT, Systems,
Transaction processing.",
}
@Article{Lamport:1987:FME,
author = "Leslie Lamport",
title = "A Fast Mutual Exclusion Algorithm",
journal = j-TOCS,
volume = "5",
number = "1",
pages = "1--11",
month = feb,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p1-lamport/",
abstract = "A new solution to the mutual exclusion problem is
presented that, in the absence of contention, requires
only seven memory accesses. It assumes atomic reads and
atomic writes to shared registers.",
acknowledgement = ack-nhfb,
affiliationaddress = "Digital Equipment Corp, Palo Alto, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer programming --- Algorithms;
computer systems, digital; memory accesses;
Multiprocessing; mutual exclusion algorithm",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Mutual exclusion. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Deadlocks. {\bf
B.3.2} Hardware, MEMORY STRUCTURES, Design Styles,
Shared memory.",
}
@Article{Cheriton:1987:UUS,
author = "David R. Cheriton",
title = "{UIO}: a {Uniform I/O} System Interface for
Distributed Systems",
journal = j-TOCS,
volume = "5",
number = "1",
pages = "12--46",
month = feb,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p12-cheriton/",
abstract = "The UIO (uniform I/O) system interface that has been
used for the past five years in the V distributed
operating system is described, with the focus on the
key design issues. This interface provides several
extensions beyond the I/O interface of UNIX, including
support for record I/O, locking, atomic transactions,
and replication, as well as attributes that indicate
whether optional semantics and operations are
available. Experience in using and implementing this
interface with a variety of different I/O services is
described, along with the performance of both local and
network I/O. It is concluded that the UIO interface
provides a uniform I/O system interface with
significant functionality, wide applicability, and no
significant performance penalty.",
acknowledgement = ack-nhfb,
affiliationaddress = "Stanford Univ, Stanford, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer interfaces; computer operating systems;
computer systems, digital --- Distributed; design;
experimentation; files input/output; interprocess
communication; performance; remote procedure call;
standardization; uniform I/O interface",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Input/output. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, Network operating
systems. {\bf D.4.7} Software, OPERATING SYSTEMS,
Organization and Design, Distributed systems. {\bf
C.2.0} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, General, Security and
protection (e.g., firewalls).",
}
@Article{Birman:1987:RCP,
author = "Kenneth P. Birman and Thomas A. Joseph",
title = "Reliable Communication in the Presence of Failures",
journal = j-TOCS,
volume = "5",
number = "1",
pages = "47--76",
month = feb,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p47-birman/",
abstract = "The design and correctness of a communication facility
for a distributed computer system are reported on. The
facility provides support for fault-tolerant process
groups in the form of a family of reliable multicast
protocols that can be used in both local-and wide-area
networks. These protocols attain high levels of
concurrency, while respecting application-specific
delivery ordering constraints, and have varying cost
and performance that depend on the degree of ordering
desired. In particular, a protocol that enforces causal
delivery orderings is introduced and shown to be a
valuable alternative to conventional asynchronous
communication protocols. The facility also ensures that
the processes belonging to a fault-tolerant process
group will observe consistent orderings of events
affecting the group as a whole.",
acknowledgement = ack-nhfb,
affiliationaddress = "Cornell Univ, Ithaca, NY, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer networks --- Protocols; computer systems,
digital; Distributed; fault tolerance; multicast
protocols; performance; reliability",
subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed databases. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Concurrency. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Synchronization. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Distributed databases.
{\bf H.2.2} Information Systems, DATABASE MANAGEMENT,
Physical Design, Recovery and restart. {\bf H.2.4}
Information Systems, DATABASE MANAGEMENT, Systems,
Concurrency. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Network communication. {\bf
C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications.",
}
@Article{Geist:1987:CDS,
author = "Robert Geist and Stephen Daniel",
title = "A Continuum of Disk Scheduling Algorithms",
journal = j-TOCS,
volume = "5",
number = "1",
pages = "77--92",
month = feb,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p77-geist/",
abstract = "A continuum of disk scheduling algorithms, V(R),
having endpoints V(0) equals SSTF and V(1) equals SCAN,
is defined. V(R) maintains a current SCAN direction (in
or out) and services next the request with the smallest
effective distance. The effective distance of a request
that lies in the current direction is its physical
distance (in cylinders) from the read\slash write head.
The effective distance of a request in the opposite
direction is its physical distance plus R multiplied by
(total number of cylinders on the disk). By use of
simulation methods, it is shown that this definitional
continuum also provides a continuum in performance,
both with respect to the mean and with respect to the
standard deviation of request waiting time. For
objective functions that are linear combinations of the
two measures, mu w plus k sigma //w, intermediate
points of the continuum are seen to provide performance
uniformly superior to both SSTF and SCAN. A method of
implementing V(R) and the results of its experimental
use in a real system are presented.",
acknowledgement = ack-nhfb,
affiliationaddress = "Clemson Univ, Clemson, SC, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer operating systems; computer
programming --- Algorithms; computer simulation;
computer systems, digital; disk scheduling algorithms;
measurement; moving-head disk; performance;
Scheduling",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Scheduling. {\bf D.4.4} Software, OPERATING
SYSTEMS, Communications Management, Input/output. {\bf
D.4.8} Software, OPERATING SYSTEMS, Performance,
Measurements. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Modeling and prediction. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Simulation.",
}
@Article{Smith:1987:RDC,
author = "Alan Jay Smith",
title = "Remark on {``Disk Cache --- Miss Ratio Analysis and
Design Consideration''}",
journal = j-TOCS,
volume = "5",
number = "1",
pages = "93--93",
month = feb,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-1/p93-smith/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Secondary storage. {\bf B.3.2} Hardware,
MEMORY STRUCTURES, Design Styles, Cache memories.",
}
@Article{Watson:1987:GET,
author = "Richard W. Watson and Sandy A. Mamrak",
title = "Gaining Efficiency in Transport Services by
Appropriate Design and Implementation Choices",
journal = j-TOCS,
volume = "5",
number = "2",
pages = "97--120",
month = may,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-2/p97-watson/",
abstract = "This paper examines transport protocol mechanisms and
implementation issues and argues that general-purpose
transport protocols can be effective in a wide range of
distributed applications because (1) many of the
mechanisms used in the special-purpose protocols can
also be used in general-purpose protocol designs and
implementations, (2) special-purpose designs have
hidden costs, and (3) very special operating system
environments, overall system loads, application
response times, and interaction patterns are required
before general-purpose protocols are the main system
performance bottlenecks.",
acknowledgement = ack-nhfb,
affiliationaddress = "Lawrence Livermore Natl Lab, Livermore, CA,
USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer networks --- Protocols; computer systems,
digital; design; Distributed; economics; interprocess
communication; performance; standardization; transport
layer protocols; transport services",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture.",
}
@Article{Joyce:1987:MDS,
author = "Jeffrey Joyce and Greg Lomow and Konrad Slind and
Brian Unger",
title = "Monitoring Distributed Systems",
journal = j-TOCS,
volume = "5",
number = "2",
pages = "121--150",
month = may,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-2/p121-joyce/",
abstract = "The monitoring of distributed systems involves the
collection, interpretation, and display of information
concerning the interactions among concurrently
executing processes. This information and its display
can support the debugging, testing, performance
evaluation, and dynamic documentation of distributed
systems. General problems associated with monitoring
are outlined in this paper, and the architecture of a
general purpose, extensible, distributed monitoring
system is presented. Three approaches to the display of
process interactions are described: textual traces,
animated graphical traces, and a combination of aspects
of the textual and graphical approaches. The roles that
each of these approaches fulfills in monitoring and
debugging distributed systems are identified and
compared. Monitoring tools for collecting communication
statistics, detecting deadlock, controlling the
nondeterministic execution of distributed systems, and
for using protocol specifications in monitoring are
also described.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Calgary, Calgary, Alberta, Can",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer software --- Monitoring; computer systems,
digital; concurrent monitoring; deadlock; design;
Distributed; distributed monitoring; dynamic
documentation; human factors; measurement; protocol
specifications",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Monitors. {\bf D.2.2} Software, SOFTWARE ENGINEERING,
Design Tools and Techniques, User interfaces. {\bf
D.2.5} Software, SOFTWARE ENGINEERING, Testing and
Debugging. {\bf D.2.4} Software, SOFTWARE ENGINEERING,
Software/Program Verification, Assertion checkers. {\bf
D.2.7} Software, SOFTWARE ENGINEERING, Distribution,
Maintenance, and Enhancement, Documentation. {\bf
D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent
Programming. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed applications. {\bf D.2.6} Software,
SOFTWARE ENGINEERING, Programming Environments.",
}
@Article{Glasgow:1987:DPF,
author = "Janice I. Glasgow and Glenn H. MacEwen",
title = "The Development and Proof of a Formal Specification
for a Multilevel Secure System",
journal = j-TOCS,
volume = "5",
number = "2",
pages = "151--184",
month = may,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-2/p151-glasgow/",
abstract = "This paper describes current work on the design and
specification of a multilevel secure distributed system
called SNet. It discusses security models in general,
the various problems of information flows in SNet, and
the abstract and concrete security model components for
SNet. It also introduces Lucid as a language for
specifying distributed systems. The model components
are expressed in Lucid; these Lucid partial
specifications are shown to be correct with respect to
the formal model, and the two model components are
shown to be consistent.",
acknowledgement = ack-nhfb,
affiliationaddress = "Queen's Univ, Kingston, Ont, Can",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer operating systems; computer programming
languages; computer systems, digital; data processing
--- Security of Data; Distributed; formal
specification; lucid; multilevel secure system;
security; SNet; verification",
subject = "{\bf C.2.0} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, General, Security and
protection (e.g., firewalls). {\bf D.3.2} Software,
PROGRAMMING LANGUAGES, Language Classifications, LUCID.
{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Information flow controls. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, SNet.",
}
@Article{Schwan:1987:HPO,
author = "Karsten Schwan and Tom Bihari and Bruce W. Weide and
Gregor Taulbee",
title = "High-Performance Operating System Primitives for
Robotics and Real-Time Control Systems",
journal = j-TOCS,
volume = "5",
number = "3",
pages = "189--231",
month = aug,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p189-schwan/",
abstract = "The Generalized Executive for real-time Multiprocessor
applications (GEM) is an operating system that
addresses several requirements of operating software.
First, when using GEM, programmers can select one of
two different types of tasks differing in size, called
processes and microprocesses. Second, the scheduling
calls offered by GEM permit the implementation of
several models of task interaction. Third, GEM supports
multiple models of communication with a parameterized
communication mechanism. Fourth, GEM is closely coupled
to prototype real-time programming environments that
provide programming support for the models of
computation offered by the operating system. GEM is
being used on a multiprocessor with robotics
application software of substantial size and
complexity.",
acknowledgement = ack-nhfb,
affiliationaddress = "Ohio State Univ, Columbus, OH, USA",
classification = "723; 731",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer operating systems; computer systems, digital
--- Multiprocessing; control systems --- Computer
Applications; generalized executive for real-time
multiprocessor applications; high-performance operating
system primitives; real-time control systems;
robotics",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Real-time systems and embedded systems.
{\bf C.3} Computer Systems Organization,
SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Process
control systems. {\bf C.3} Computer Systems
Organization, SPECIAL-PURPOSE AND APPLICATION-BASED
SYSTEMS, Real-time and embedded systems. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS,
Design studies. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Scheduling. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Message sending. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Measurements.
{\bf J.7} Computer Applications, COMPUTERS IN OTHER
SYSTEMS, Industrial control. {\bf J.7} Computer
Applications, COMPUTERS IN OTHER SYSTEMS, Process
control. {\bf J.7} Computer Applications, COMPUTERS IN
OTHER SYSTEMS, Real time. {\bf D.4.0} Software,
OPERATING SYSTEMS, General.",
}
@Article{Harter:1987:RTL,
author = "Paul K. {Harter, Jr.}",
title = "Response Times in Level-Structured Systems",
journal = j-TOCS,
volume = "5",
number = "3",
pages = "232--248",
month = aug,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p232-harter/",
abstract = "Real-time programs are among the most critical
programs in use today, yet they are also among the
worst understood and the most difficult to verify.
Validation of real-time systems is nonetheless
extremely important in view of the high costs
associated with failure in typical application areas.
We present here a method for deriving response-time
properties in complex systems with a level structure
based on priority. The method involves a level-by-level
examination of the system, in which information
distilled from each successive level is used to adjust
the results for later levels. The results obtained at
each level of the system are not affected by later
analyses, which obviates having to consider a complex
system as a whole.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Colorado, Boulder, CO, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer software --- Reliability; computer systems,
digital; design; level-structured systems; performance;
Performance; real-time systems; reliability; response
times; verification",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Modeling and prediction. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design,
Hierarchical design**. {\bf D.4.7} Software, OPERATING
SYSTEMS, Organization and Design, Real-time systems and
embedded systems. {\bf J.7} Computer Applications,
COMPUTERS IN OTHER SYSTEMS, Industrial control. {\bf
J.7} Computer Applications, COMPUTERS IN OTHER SYSTEMS,
Process control. {\bf J.7} Computer Applications,
COMPUTERS IN OTHER SYSTEMS, Real time. {\bf D.2.4}
Software, SOFTWARE ENGINEERING, Software/Program
Verification, Validation. {\bf D.2.4} Software,
SOFTWARE ENGINEERING, Software/Program Verification,
Validation.",
}
@Article{Herlihy:1987:CVA,
author = "Maurice Herlihy",
title = "Concurrency Versus Availability: Atomicity Mechanisms
for Replicated Data",
journal = j-TOCS,
volume = "5",
number = "3",
pages = "249--274",
month = aug,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p249-herlihy/",
abstract = "A replicated object is a typed data object that is
stored redundantly at multiple locations to enhance
availability. Most techniques for managing replicated
data have a two-level structure: At the higher level, a
replica-control protocol reconstructs the object's
state from its distributed components, and at the lower
level, a standard concurrency-control protocol
synchronizes accesses to the individual components.
This paper explores an alternative approach to managing
replicated data by presenting two replication methods
in which concurrency control and replica management are
handled by a single integrated protocol. These
integrated protocols permit more concurrency than
independent protocols, and they allow availability and
concurrency to be traded off: Constraints on
concurrency may be relaxed if constraints on
availability are tightened, and vice versa. In general,
constraints on concurrency and availability cannot be
minimized simultaneously.",
acknowledgement = ack-nhfb,
affiliationaddress = "Carnegie-Mellon Univ, Pittsburgh, PA, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "atomicity mechanisms; computer programming ---
Algorithms; database systems; replicated data",
subject = "{\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language
Constructs and Features. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability. {\bf H.2.4}
Information Systems, DATABASE MANAGEMENT, Systems. {\bf
H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Distributed databases. {\bf H.2.4} Information
Systems, DATABASE MANAGEMENT, Systems, Transaction
processing. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management, Concurrency.",
}
@Article{Kirkman:1987:OCP,
author = "W. Worth Kirkman",
title = "An Optimized Contention Protocol for Broadband
Networks",
journal = j-TOCS,
volume = "5",
number = "3",
pages = "275--283",
month = aug,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p275-kirkman/",
abstract = "This paper describes the concepts underlying an
alternative link-level protocol for broadband local
networks. The protocol uses implicit slotting of the
contention channel to support larger networks, improve
performance, and provide reliable distributed collision
recognition without reinforcement. It is designed such
that compatible interfaces to existing CSMA\slash
CD-based systems can be provided.",
acknowledgement = ack-nhfb,
affiliationaddress = "MITRE Corp, McLean, VA, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; broadband networks; carrier sense multiple
access/collision detection network; computer networks;
CSMA/CD-based systems; data transmission; local
networks; optimized contention protocol; performance;
Protocols",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols.
{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Packet-switching networks.",
}
@Article{Sanders:1987:ISD,
author = "Beverly A. Sanders",
title = "The Information Structure of Distributed Mutual
Exclusion Algorithms",
journal = j-TOCS,
volume = "5",
number = "3",
pages = "284--299",
month = aug,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-3/p284-sanders/",
abstract = "The concept of an information structure is introduced
as a unifying principle behind several of the numerous
algorithms that have been proposed for the distributed
mutual exclusion problem. This approach allows the
development of a generalized mutual exclusion algorithm
that accepts a particular information structure at
initialization and realizes both known and new
algorithms as special cases. Two simple performance
metrics of a realized algorithm can be obtained
directly from the information structure. A new failure
recovery mechanism called local recovery, which
requires no coordination between nodes and no
additional messages beyond that needed for failure
detection, is introduced.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Maryland, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer programming --- Algorithms;
computer systems, digital; design; Distributed;
distributed mutual exclusion algorithms; failure
recovery; local recovery; performance; reliability;
theory",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Reliability,
availability, and serviceability. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Mutual
exclusion. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance.",
}
@Article{Thiebaut:1987:FC,
author = "Dominique Thiebaut and Harold S. Stone",
title = "Footprints in the Cache",
journal = j-TOCS,
volume = "5",
number = "4",
pages = "305--329",
month = nov,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p305-thiebaut/",
abstract = "This paper develops an analytical model for
cache-reload transients and compares the model to
observations based on several address traces. The
cache-reload transient is the set of cache misses that
occur when a process is reinitiated after being
suspended temporarily. For example, an interrupt
program that runs periodically experiences a reload
transient at each initiation. The reload transient
depends on the cache size and on the sizes of the
footprints in the cache of the competing programs,
where a program footprint is defined to be the set of
lines in the cache in active use by the program. The
model shows that the size of the transient is related
to the normal distribution function. A simulation based
on program-address traces shows excellent agreement
between the model and the observations.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Massachusetts, MA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "address traces; cache-reload transients; computer
architecture; computer operating systems --- Storage
Allocation; data storage units; design;
experimentation; memory structures; performance;
program footprint; theory; trace-driven simulation",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Formal models**. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Simulation**. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS, Modeling techniques. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.2} Software, OPERATING SYSTEMS, Storage Management,
Swapping**.",
}
@Article{Falcone:1987:PIL,
author = "Joseph R. Falcone",
title = "A Programmable Interface Language for Heterogeneous
Distributed Systems",
journal = j-TOCS,
volume = "5",
number = "4",
pages = "330--351",
month = nov,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p330-falcone/",
abstract = "The performance requirements of systems of
personal-computer workstations places a strain on
traditional approaches to network architecture. The
integration of diverse systems into this environment
introduces functional compatibility issues that are not
present in homogeneous networks. This paper proposes a
distributed system architecture in which communication
follows a programming paradigm. In this architecture a
programming language provides remote service interfaces
for the heterogeneous distributed system environment.
This language is a flexible and efficient medium for
implementing service function protocols. In essence,
clients and servers communication by programming one
another.",
acknowledgement = ack-nhfb,
affiliationaddress = "Digital Equipment Corp",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer interfaces; computer networks --- Local
Networks; computer programming languages; computer
systems, digital --- Distributed; computers, personal;
heterogeneous distributed systems; personal computer
workstation networks; programmable interface language",
}
@Article{Koch:1987:DFA,
author = "Philip D. L. Koch",
title = "Disk File Allocation Based on the Buddy System",
journal = j-TOCS,
volume = "5",
number = "4",
pages = "352--370",
month = nov,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p352-koch/",
abstract = "A variant of the binary buddy system that reduces
fragmentation is described. Files are allocated on up
to t extents, and inoptimally allocated files are
periodically reallocated. The Dartmouth Time-Sharing
System (DTSS) uses this method. Several installations,
representing different classes of workload are studied
to measure the method's performance. The results
indicate that compared to the file layout method used
by UNIX, the buddy system results in more efficient
access but less efficient utilization of disk space. As
disks become larger and less expensive per byte,
strategies that achieve efficient I/O throughput at the
expense of some storage loss become increasingly
attractive.",
acknowledgement = ack-nhfb,
affiliationaddress = "Dartmouth Coll, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "buddy system; computer operating systems; data
processing --- File Organization; disk file allocation;
dynamic memory management; dynamic storage allocation;
file system design; measurement; performance; Storage
Allocation",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, File organization. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, Access
methods. {\bf D.4.2} Software, OPERATING SYSTEMS,
Storage Management, Allocation/deallocation strategies.
{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Secondary storage. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Measurements. {\bf
H.3.2} Information Systems, INFORMATION STORAGE AND
RETRIEVAL, Information Storage, File organization. {\bf
E.5} Data, FILES, Organization/structure.",
}
@Article{Herzberg:1987:PPS,
author = "Amir Herzberg and Shlomit S. Pinter",
title = "Public Protection of Software",
journal = j-TOCS,
volume = "5",
number = "4",
pages = "371--393",
month = nov,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p371-herzberg/",
abstract = "One of the overwhelming problems that software
producers must contend with is the unauthorized use and
distribution of their products. Copyright laws
concerning software are rarely enforced, thereby
causing major losses to the software companies.
Technical means of protecting software from illegal
duplication are required, but the available means are
imperfect. We present protocols that enable software
protection, without causing substantial overhead in
distribution and maintenance. The protocols may be
implemented by a conventional cryptosystem, such as the
DES, or by a public key cryptosystem, such as the RSA.
Both implementations are proved to satisfy required
security criteria.",
acknowledgement = ack-nhfb,
affiliationaddress = "Technion-Israel Inst of Technology, Isr",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; computer software; cryptographic
protocols; cryptography; design; Protection; public key
cryptosystems; security; security protocols; single key
cryptosystems",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Cryptographic controls. {\bf K.5.1}
Computing Milieux, LEGAL ASPECTS OF COMPUTING,
Hardware/Software Protection. {\bf E.3} Data, DATA
ENCRYPTION, Public key cryptosystems. {\bf D.4.6}
Software, OPERATING SYSTEMS, Security and Protection.",
}
@Article{Babaoglu:1987:RCB,
author = "{\"O}zalp Babao{\u{g}}lu",
title = "On the Reliability of Consensus-Based Fault-Tolerant
Distributed Computing Systems",
journal = j-TOCS,
volume = "5",
number = "4",
pages = "394--416",
month = nov,
year = "1987",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1987-5-4/p394-babaoglu/",
abstract = "Using a stochastic model of processor failure times,
we investigate design choices such as replication
level, protocol running time, randomized versus
deterministic protocols, fault detection, and
authentication. We use the probability with which a
system produces the correct output as our evaluation
criterion. This contrasts with previous fault-tolerance
results that guarantee correctness only if the
percentage of faulty processors in the system can be
bounded. Our results reveal some subtle and
counterintuitive interactions between the design
parameters and system reliability.",
acknowledgement = ack-nhfb,
affiliationaddress = "Cornell Univ, USA",
classification = "722; 723; 913",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Byzantine agreement; computer systems, digital;
design; deterministic protocols; distributed consensus;
Fault Tolerant Capability; fault-tolerant distributed
system; performance; randomized protocols;
reliability",
subject = "{\bf B.1.3} Hardware, CONTROL STRUCTURES AND
MICROPROGRAMMING, Control Structure Reliability,
Testing, and Fault-Tolerance**, Redundant design**.
{\bf B.3.4} Hardware, MEMORY STRUCTURES, Reliability,
Testing, and Fault-Tolerance**, Redundant design**.
{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Reliability, availability, and
serviceability.",
}
@Article{Jones:1988:PSI,
author = "Anita K. Jones",
title = "Preface: Special Issue on Operating Systems
Principles",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "1--2",
month = feb,
year = "1988",
bibdate = "Thu Jan 14 11:09:14 1999",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Terry:1988:MSV,
author = "Douglas B. Terry and Daniel C. Swinehart",
title = "Managing Stored Voice in the {Etherphone} System",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "3--27",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p3-terry/",
abstract = "The voice manager in the Etherphone system provides
facilities for recording, editing, and playing stored
voice in a distributed personal-computing environment.
To facilitate sharing, the voice manager stores voice
on a special voice file server that is accessible via
the local internet. Operations for editing a passage of
recorded voice simply build persistent data structures
to represent the edited voice. These data structures,
implementing an abstraction called voice ropes, are
stored in a server database and consist of lists of
intervals within voice files. Clients refer to voice
ropes solely by reference. Interests, additional
persistent data structures maintained by the server,
provide a sort of directory service for managing the
voice ropes that have been created as well as a
reliable reference-counting mechanism, permitting the
garbage collection of voice ropes that are no longer
needed.",
acknowledgement = ack-nhfb,
affiliationaddress = "XEROX Palo Alto Research Cent, Palo Alto, CA,
USA",
classification = "718; 723",
conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "computer networks; computer operating systems;
computer systems, digital --- Distributed; data
processing --- Data Structures; design; digital
communication systems; electronic mail; Etherphone
system; management; performance; security; voice
editing; voice file server; voice manager; Voice/Data
Integrated Services",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY, USA",
subject = "{\bf H.4.3} Information Systems, INFORMATION SYSTEMS
APPLICATIONS, Communications Applications. {\bf D.4.2}
Software, OPERATING SYSTEMS, Storage Management,
Allocation/deallocation strategies. {\bf D.4.2}
Software, OPERATING SYSTEMS, Storage Management,
Storage hierarchies. {\bf D.4.3} Software, OPERATING
SYSTEMS, File Systems Management. {\bf D.4.6} Software,
OPERATING SYSTEMS, Security and Protection, Access
controls. {\bf D.4.6} Software, OPERATING SYSTEMS,
Security and Protection, Cryptographic controls. {\bf
E.2} Data, DATA STORAGE REPRESENTATIONS. {\bf H.2.8}
Information Systems, DATABASE MANAGEMENT, Database
Applications. {\bf H.4.3} Information Systems,
INFORMATION SYSTEMS APPLICATIONS, Communications
Applications, Electronic mail. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems.",
}
@Article{Chang:1988:SAP,
author = "Albert Chang and Mark F. Mergen",
title = "801 Storage: Architecture and Programming",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "28--50",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p28-chang/",
abstract = "Based on novel architecture, the 801 minicomputer
project has developed a low-level storage manager that
can significantly simplify storage programming in
subsystems and applications. The storage manager
embodies three ideas: (1) large virtual storage, to
contain all temporary data and permanent files for the
active programs; (2) the innovation of database
storage, which has implicit properties of access
serializability and atomic update, similar to those of
database transaction systems; and (3) access to all
storage, including files, by the usual operations and
types of a high-level programming language. The IBM RT
PC implements the hardware architecture necessary for
these storage facilities in its storage controller
(MMU). The storage manager and language elements
required, as well as subsystems and applications that
use them, have been implemented and studied in a
prototype operating system called CPR, that runs on the
RT PC. Low cost and good performance are achieved in
both hardware and software. The design is intended to
be extensible across a wide performance\slash cost
spectrum.",
acknowledgement = ack-nhfb,
affiliationaddress = "IBM T. J. Watson Research Cent, Yorktown
Heights, NY, USA",
classification = "723",
conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "801 minicomputer project; algorithms; computer
architecture; computer operating systems; computers,
minicomputer; CPR operating system; design;
experimentation; IBM RT PC; low-level storage manager;
performance",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY, USA",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Virtual memory. {\bf C.1.1} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Single Data
Stream Architectures, RISC. {\bf D.3.3} Software,
PROGRAMMING LANGUAGES, Language Constructs and
Features. {\bf D.4.2} Software, OPERATING SYSTEMS,
Storage Management. {\bf D.4.3} Software, OPERATING
SYSTEMS, File Systems Management. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design.",
}
@Article{Howard:1988:SPD,
author = "John H. Howard and Michael L. Kazar and Sherri G.
Menees and David A. Nichols and M. Satyanarayanan and
Robert N. Sidebotham and Michael J. West",
title = "Scale and Performance in a Distributed File System",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "51--81",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p51-howard/",
abstract = "The Andrew File System is a location-transparent
distributed file system that will eventually span more
than 5000 workstations at Carnegie Mellon University.
Large scale affects performance and complicates system
operation. In this paper we present observations of a
prototype implementation, motivate changes in the areas
of cache validation, server process structure, name
translation, and low-level storage representation, and
quantitatively demonstrate Andrew's ability to scale
gracefully. We establish the importance of whole-file
transfer and caching in Andrew by comparing its
performance with that of Sun Microsystem's NFS file
system. We also show how the aggregation of files into
volumes improves the operability of the system.",
acknowledgement = ack-nhfb,
affiliationaddress = "Carnegie Mellon Univ, Pittsburgh, PA, USA",
classification = "723",
conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Andrew File System; computer operating systems;
computer systems, digital --- Distributed; design;
distributed file system; experimentation; file
transfer; measurement; performance",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY, USA",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Distributed file systems. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance,
Measurements.",
}
@Article{Haskin:1988:RMQ,
author = "Roger Haskin and Yoni Malachi and Wayne Sawdon and
Gregory Chan",
title = "Recovery Management in {QuickSilver}",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "82--108",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p82-haskin/",
abstract = "This paper describes QuickSilver, which uses atomic
transactions as a unified failure recovery mechanism
for a client-server structured distributed system.
Transactions allow failure atomicity for related
activities at a single server or at a number of
independent servers. Rather than bundling transaction
management into a dedicated language or recoverable
object manager, QuickSilver exposes the basic commit
protocol and log recovery primitives, allowing clients
and servers to tailor their recovery techniques to
their specific needs. Servers can implement their own
log recovery protocols rather than being required to
use a system-defined protocol. These decisions allow
servers to make their own choices to balance
simplicity, efficiency, and recoverability.",
acknowledgement = ack-nhfb,
affiliationaddress = "IBM, Almaden Research Cent, San Jose, CA, USA",
classification = "723",
conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "atomic transactions; computer operating systems;
computer systems, digital --- Distributed; design;
experimentation; failure atomicity; performance;
QuickSilver; recovery management; reliability",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY, USA",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Distributed file systems. {\bf D.4.3}
Software, OPERATING SYSTEMS, File Systems Management,
File organization. {\bf D.4.3} Software, OPERATING
SYSTEMS, File Systems Management, Maintenance**. {\bf
D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, QuickSilver. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability,
Checkpoint/restart. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Distributed databases.
{\bf H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Transaction processing. {\bf H.2.2}
Information Systems, DATABASE MANAGEMENT, Physical
Design, Recovery and restart.",
}
@Article{Jul:1988:FGM,
author = "Eric Jul and Henry Levy and Norman Hutchinson and
Andrew Black",
title = "Fine-Grained Mobility in the {Emerald} System",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "109--133",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p109-jul/",
abstract = "Emerald is an object-based language and system
designed for the construction of distributed programs.
An explicit goal of Emerald is support for object
mobility; objects in Emerald can freely move within the
system to take advantage of distribution and
dynamically changing environments. We say that Emerald
has fine-grained mobility because Emerald objects can
be small data objects as well as process objects.
Fine-grained mobility allows us to apply mobility in
new ways but presents implementation problems as well.
This paper discusses the benefits of fine-grained
mobility, the Emerald language and run-time mechanisms
that support mobility, and techniques for implementing
mobility that do not degrade the performance of local
operations. Performance measurements of the current
implementation are included.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of Washington, Seattle, WA, USA",
classification = "723",
conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer operating systems; computer programming
languages; computer systems, digital --- Distributed;
design; distributed languages; emerald; languages;
measurement; object-oriented languages; performance;
process mobility",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY, USA",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed applications. {\bf D.2.6} Software,
SOFTWARE ENGINEERING, Programming Environments. {\bf
D.3.3} Software, PROGRAMMING LANGUAGES, Language
Constructs and Features, Abstract data types. {\bf
D.3.3} Software, PROGRAMMING LANGUAGES, Language
Constructs and Features, Control structures. {\bf
D.4.7} Software, OPERATING SYSTEMS, Organization and
Design, Distributed systems. {\bf D.3.2} Software,
PROGRAMMING LANGUAGES, Language Classifications,
Emerald.",
}
@Article{Nelson:1988:CSN,
author = "Michael N. Nelson and Brent B. Welch and John K.
Ousterhout",
title = "Caching in the {Sprite} Network File System",
journal = j-TOCS,
volume = "6",
number = "1",
pages = "134--154",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-1/p134-nelson/",
abstract = "The Sprite network operating system uses large
main-memory disk block caches to achieve high
performance in its file system. It provides
non-write-through file caching on both client and
server machines. A simple cache consistency mechanism
permits files to be shared by multiple clients without
danger of stale data. In order to allow the file cache
to occupy as much memory as possible, the file system
of each machine negotiates with the virtual memory
system over physical memory usage and changes the size
of the file cache dynamically. Benchmark programs
indicate that client caches allow diskless Sprite
workstations to perform within 0-12 percent of
workstations with disks. In addition, client caching
reduces server loading by 50 percent and network
traffic by 90 percent.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of California at Berkeley, Berkeley, CA,
USA",
classification = "723",
conference = "1987 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "cache consistency; computer operating systems;
computer systems, digital --- Distributed; design;
distributed file caching; distributed file systems;
measurement; performance; sprite network",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management. {\bf D.4.3} Software, OPERATING SYSTEMS,
File Systems Management, Distributed file systems. {\bf
D.4.7} Software, OPERATING SYSTEMS, Organization and
Design, Distributed systems. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Measurements. {\bf
D.4.2} Software, OPERATING SYSTEMS, Storage Management,
Distributed memories. {\bf D.4.2} Software, OPERATING
SYSTEMS, Storage Management, Main memory. {\bf D.4.2}
Software, OPERATING SYSTEMS, Storage Management,
Secondary storage. {\bf D.4.2} Software, OPERATING
SYSTEMS, Storage Management, Virtual memory.",
}
@Article{Snodgrass:1988:RAM,
author = "Richard Snodgrass",
title = "A Relational Approach to Monitoring Complex Systems",
journal = j-TOCS,
volume = "6",
number = "2",
pages = "157--196",
month = may,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-2/p157-snodgrass/",
abstract = "Traditional monitoring techniques are inadequate when
monitoring complex systems such as multiprocessors or
distributed systems. A new approach is described in
which a historical database forms the conceptual basis
for the information processed by the monitor. This
approach permits advances in specifying the low-level
data collection, specifying the analysis of the
collected data, performing the analysis, and displaying
the results. Two prototype implementations demonstrate
the feasibility of the approach.",
acknowledgement = ack-nhfb,
affiliationaddress = "Univ of North Carolina, NC, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer systems, digital; data processing --- Data
Reduction and Analysis; database systems ---
Relational; design; distributed systems;
experimentation; languages; low-level data collection;
measurement; Monitoring; multiprocessors; performance",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Monitors. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed applications. {\bf D.2.6} Software,
SOFTWARE ENGINEERING, Programming Environments. {\bf
D.4.8} Software, OPERATING SYSTEMS, Performance,
Measurements. {\bf D.2.5} Software, SOFTWARE
ENGINEERING, Testing and Debugging. {\bf H.2.3}
Information Systems, DATABASE MANAGEMENT, Languages,
Query languages. {\bf H.2.3} Information Systems,
DATABASE MANAGEMENT, Languages, QUEL. {\bf H.2.1}
Information Systems, DATABASE MANAGEMENT, Logical
Design, Data models.",
}
@Article{Sandhu:1988:NTD,
author = "Ravinderpal S. Sandhu",
title = "The {NTree}: a Two Dimension Partial Order for
Protection Groups",
journal = j-TOCS,
volume = "6",
number = "2",
pages = "197--222",
month = may,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-2/p197-sandhu/",
abstract = "The benefits of providing access control with groups
of users rather than with individuals as the unit of
granularity are enhanced if the groups are organized in
a subgroup partial order. A class of such partial
orders, called ntrees, is defined by using a forest of
rooted trees or inverted rooted trees as basic partial
orders and combining these by refinement. Refinement
explodes an existing group into a partially ordered
ntree of new groups while maintaining the same
relationship between each new group and the nonexploded
groups that the exploded group had. Examples are
discussed to show the practical significance of ntrees
and the refinement operation.",
acknowledgement = ack-nhfb,
affiliationaddress = "Ohio State Univ, OH, USA",
classification = "722; 723; 921",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "access control; computer systems, digital; data
processing --- Security of Data; design; management;
mathematical techniques --- Trees; ntree; protection
groups; security; theory; two-dimensional partial
order",
subject = "{\bf H.2.0} Information Systems, DATABASE MANAGEMENT,
General, Security, integrity, and protection**. {\bf
D.4.6} Software, OPERATING SYSTEMS, Security and
Protection. {\bf K.6.m} Computing Milieux, MANAGEMENT
OF COMPUTING AND INFORMATION SYSTEMS, Miscellaneous,
Security*. {\bf H.3.3} Information Systems, INFORMATION
STORAGE AND RETRIEVAL, Information Search and
Retrieval, Search process. {\bf I.2.8} Computing
Methodologies, ARTIFICIAL INTELLIGENCE, Problem
Solving, Control Methods, and Search, Graph and tree
search strategies.",
}
@Article{Gross:1988:MEM,
author = "Thomas R. Gross and John L. Hennessy and Steven A.
Przybylski and Christopher Rowen",
title = "Measurement and Evaluation of the {MIPS} Architecture
and Processor",
journal = j-TOCS,
volume = "6",
number = "3",
pages = "229--257",
month = aug,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p229-gross/",
abstract = "MIPS is a 32-bit processor architecture that has been
implemented as an nMOS VLSI chip. The instruction set
architecture is RISC-based. Close coupling with
compilers and efficient use of the instruction set by
compiled programs were goals of the architecture. The
MIPS architecture requires that the software implement
some constraints in the design that are normally
considered part of the hardware implementation. This
power presents experimental results on the
effectiveness of this processor as a program host.
Using sets of large and small benchmarks, the
instruction and operand usage patterns are examined
both for optimized and unoptimized code.",
acknowledgement = ack-nhfb,
affiliationaddress = "Stanford Univ, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "32-bit microprocessor; benchmarks; computer
architecture --- Performance; computers, microcomputer;
design; Evaluation; experimentation; measurement; MIPS;
performance",
subject = "{\bf C.1.1} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Single Data Stream Architectures,
Pipeline processors**. {\bf C.0} Computer Systems
Organization, GENERAL, Instruction set design. {\bf
C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Design studies. {\bf C.5.4} Computer Systems
Organization, COMPUTER SYSTEM IMPLEMENTATION, VLSI
Systems.",
}
@Article{Gifford:1988:RPP,
author = "David K. Gifford and Nathan Glasser",
title = "Remote Pipes and Procedures for Efficient Distributed
Communication",
journal = j-TOCS,
volume = "6",
number = "3",
pages = "258--283",
month = aug,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p258-gifford/",
abstract = "We describe a new communications model for distributed
systems that combines the advantages of remote
procedure call with the efficient transfer of bulk
data. Three ideas form the basis of this model. First,
remote procedures are first-class values which can be
freely exchanged among nodes, thus enabling a greater
variety of protocols to be directly implemented in a
remote procedure call framework. Second, a new type of
abstract object, called a pipe, allows bulk data and
incremental results to be efficiently transported in a
type-safe manner. Third, the relative sequencing of
pipes and procedures can be controlled by combining
them into channel groups. Calls on the members of a
channel group are guaranteed to be processed in order.
Application experience with this model, which we call
the Channel Model, is reported. Derived performance
bounds and experimental measures are presented.",
acknowledgement = ack-nhfb,
affiliationaddress = "MIT, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "bulk data transfer; channel model; computer systems,
digital; data transmission; design; Distributed;
performance; performance bounds; remote procedure
call",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, Distributed
applications. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS, Performance attributes.",
}
@Article{Johnson:1988:SSR,
author = "Dale M. Johnson and F. Javier Thayer",
title = "Stating Security Requirements with Tolerable Sets",
journal = j-TOCS,
volume = "6",
number = "3",
pages = "284--295",
month = aug,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p284-johnson/",
abstract = "This paper introduces and develops the concept of
tolerable sets for analyzing general security
requirements. Tolerable sets, and corresponding purging
functions and invisibility based on the sets, are used
to state and test such requirements. Some particular
applications are described, and some critical remarks
about purging functions are included.",
acknowledgement = ack-nhfb,
affiliationaddress = "MITRE Corp, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "computer security requirements; computer systems,
digital; data processing; purging functions; security;
Security of Data; tolerable sets; verification",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Information flow controls. {\bf D.4.6}
Software, OPERATING SYSTEMS, Security and Protection,
Security kernels**. {\bf D.4.6} Software, OPERATING
SYSTEMS, Security and Protection, Verification**. {\bf
F.3.1} Theory of Computation, LOGICS AND MEANINGS OF
PROGRAMS, Specifying and Verifying and Reasoning about
Programs, Specification techniques.",
}
@Article{Colwell:1988:PEA,
author = "Robert P. Colwell and Edward F. Gehringer and E.
Douglas Jensen",
title = "Performance Effects of Architectural Complexity in the
{Intel 432}",
journal = j-TOCS,
volume = "6",
number = "3",
pages = "296--339",
month = aug,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-3/p296-colwell/",
abstract = "The Intel 432 is noteworthy as an architecture
incorporating a large amount of functionality that most
other systems perform by software. This paper examines
the performance impact of the incorporation of several
kinds of functionality. Among these are the addressing
structure, the caches, instruction alignment, the
buses, and the way that garbage collection is handled.
A set of several benchmarks is used to quantify the
performance effect of each of these decisions. The
results indicate that the 432 could have been speeded
up very significantly if a small number of
implementation decisions had been made differently, and
if incrementally better technology had been used in its
construction.",
acknowledgement = ack-nhfb,
affiliationaddress = "Multiflow Computer Inc",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "architectural complexity; computer programming;
computer systems, digital --- Parallel Processing;
design; Intel 432; measurement; object-based
programming environment; performance; Performance;
security",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Design studies. {\bf B.5.m} Hardware,
REGISTER-TRANSFER-LEVEL IMPLEMENTATION, Miscellaneous.
{\bf C.1.1} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Single Data Stream Architectures,
Single-instruction-stream, single-data-stream
processors (SISD)**. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors),
Multiple-instruction-stream, multiple-data-stream
processors (MIMD). {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors), Parallel
processors**. {\bf C.1.3} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Other
Architecture Styles, Capability architectures**. {\bf
C.1.3} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Other Architecture Styles, High-level
language architectures**. {\bf C.1.3} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Other
Architecture Styles, Stack-oriented processors**. {\bf
D.3.4} Software, PROGRAMMING LANGUAGES, Processors,
Compilers.",
}
@Article{Peterson:1988:PNS,
author = "Larry L. Peterson",
title = "The {Profile} Naming Service",
journal = j-TOCS,
volume = "6",
number = "4",
pages = "341--364",
month = nov,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p341-peterson/",
abstract = "Profile is a descriptive naming service used to
identify users and organizations. This paper presents a
structural overview of Profile's three major
components: a confederation of attribute-based name
servers, a name space abstraction that unifies the name
servers, and a user interface that integrates the name
space with existing naming systems. Each name server is
an independent authority that allows clients to
describe users and organizations with a multiplicity of
attributes; the name space abstraction is a client
program that implements a discipline for searching a
sequence of name servers; and the interface provides a
tool with which users build customized commands.
Experience with an implementation in the DARPA\slash
NSF Internet demonstrates that Profile is a feasible
and effective mechanism for naming users and
organizations in a large internet.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Arizona",
affiliationaddress = "Tucson, AZ, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Attribute-Based Name Servers; Computer Networks;
Computer Programming --- Algorithms; Computer Systems,
Digital; DARPA-NSF Internet; Database Systems ---
Distributed; design; Distributed; human factors; Name
Space Abstraction; Naming Service; Profile; User
Interface",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed databases. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Distributed databases.
{\bf H.3.4} Information Systems, INFORMATION STORAGE
AND RETRIEVAL, Systems and Software, Question-answering
(fact retrieval) systems**. {\bf H.3.3} Information
Systems, INFORMATION STORAGE AND RETRIEVAL, Information
Search and Retrieval, Search process.",
}
@Article{Atkins:1988:ESD,
author = "M. Stella Atkins",
title = "Experiments in {SR} with Different Upcall Program
Structures",
journal = j-TOCS,
volume = "6",
number = "4",
pages = "365--392",
month = nov,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p365-atkins/",
abstract = "This paper explores program designs for layered
systems such as communication protocols and
server\slash client systems that do not exhibit a
strict hierarchy in their control flow. D. D. Clark
(1985) has proposed structuring such systems, where
both upward and downward control flow are required, to
use efficient synchronous procedure calls between the
layers whenever possible. The term upcall is used by
Clark to describe this synchronous upward communication
from server to client. Several techniques are possible
for structuring such programs using upcalls.
Comparisons are made by implementing a communication
protocol described by Clark in three different ways.
The first method implements all the protocol routines
in a single large module. The second method structures
the routines into modules occupying vertical slices of
the protocol layers, and the third method structures
the routines into modules corresponding to the protocol
layers. It is concluded that the vertically layered
protocol design is to be preferred unless there are
many shared variables between the send-side and
receive-side, as it is very efficient and provides the
best protection of clients from each other. The
horizontally layered design is the least efficient, but
it is the easiest to program.",
acknowledgement = ack-nhfb,
affiliation = "Simon Fraser Univ",
affiliationaddress = "Burnaby, BC, Can",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Computer Networks --- Protocols; Computer Programming
--- Algorithms; Computer Software; design; Design;
languages; Layered Systems; performance; Server/Client
Systems; Upcall Program Structures",
subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming. {\bf D.3.3} Software,
PROGRAMMING LANGUAGES, Language Constructs and
Features, Concurrent programming structures. {\bf
D.3.2} Software, PROGRAMMING LANGUAGES, Language
Classifications, SR. {\bf C.2.2} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Protocols. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Buffering. {\bf D.4.7}
Software, OPERATING SYSTEMS, Organization and Design,
Hierarchical design**. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Simulation.",
}
@Article{Agarwal:1988:CPO,
author = "Anant Agarwal and John Hennessy and Mark Horowitz",
title = "Cache Performance of Operating System and
Multiprogramming Workloads",
journal = j-TOCS,
volume = "6",
number = "4",
pages = "393--431",
month = nov,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p393-agarwal/",
abstract = "Large caches are necessary in current high-performance
computer systems to provide the required high memory
bandwidth. Because a small decrease in cache
performance can result in significant system
performance degradation, accurately characterizing the
performance of large caches is important. Although
measurements on actual systems have shown that
operating systems and multiprogramming can affect cache
performance, previous studies have not focused on these
effects. We have developed a program tracing technique
called ATUM (Address Tracing Using Microcode) that
captures realistic traces of multitasking workloads
including the operating system. Examining cache
behavior using these traces from a VAX processor shows
that both the operating system and multiprogramming
activity significantly degrade cache performance, with
an even greater proportional impact on large caches.
From a careful analysis of the causes of this
degradation, we explore various techniques to reduce
this loss. While seemingly little can be done to
mitigate the effect of system references, multitasking
cache miss activity can be substantially reduced with
small hardware additions.",
acknowledgement = ack-nhfb,
affiliation = "Stanford Univ",
affiliationaddress = "Stanford, CA, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Address Tracing Using Microcode; ATUM; Cache
Performance; Computer Operating Systems; Computer
Systems Programming --- Multiprogramming; design;
measurement; Multiprogramming Workloads; performance;
Performance; Program Tracing",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Associative memories. {\bf
B.3.2} Hardware, MEMORY STRUCTURES, Design Styles,
Virtual memory. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Formal models**. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Simulation**. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS, Design studies. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS,
Measurement techniques. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Modeling
techniques. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.8} Software, OPERATING SYSTEMS, Performance,
Measurements.",
}
@Article{Okamoto:1988:DMS,
author = "Tatsuaki Okamoto",
title = "A Digital Multisignature Scheme using Bijective
Public-Key Cryptosystems",
journal = j-TOCS,
volume = "6",
number = "4",
pages = "432--441",
month = nov,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1988-6-4/p432-okamoto/",
abstract = "A new digital multisignature scheme using bijective
public-key cryptosystems that overcomes the problems of
previous signature schemes used for multisignatures is
proposed. The principal features of this scheme are (1)
the length of a multisignature message is nearly
equivalent to that for a single signature message; (2)
by using a one-way hash function, multisignature
generation and verification are processed in an
efficient manner; (3) the order of signing is not
restricted; and (4) this scheme can be constructed on
any bijective public-key cryptosystem as well as the
RSA scheme. In addition, it is shown that the new
scheme is considered as safe as the public-key
cryptosystem used in this new scheme. Some variations
based on the scheme are also presented.",
acknowledgement = ack-nhfb,
affiliation = "NTT",
affiliationaddress = "Yokosuka, Jpn",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Bijective Public-Key Cryptosystems; Computer-Based
Message Systems; Cryptography; Digital Multisignature
Scheme; Electronic Mail; One-Way Hash Function;
security",
subject = "{\bf E.3} Data, DATA ENCRYPTION, Public key
cryptosystems.",
}
@Article{Borg:1989:FTU,
author = "Anita Borg and Wolfgang Blau and Wolfgang Graetsch and
Ferdinand Herrmann and Wolfgang Oberle",
title = "Fault Tolerance under {UNIX}",
journal = j-TOCS,
volume = "7",
number = "1",
pages = "1--24",
month = feb,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p1-borg/",
abstract = "The initial design for a distributed, fault-tolerant
version of UNIX based on three-way atomic message
transmission was presented in an earlier paper. This
paper describes the working system, now known as the
TARGON\slash 32. The original design left open
questions in at least two areas: fault tolerance for
server processes and recovery after a crash were
briefly and inaccurately sketched; rebackup after
recovery was not discussed at all. The fundamental
design involving three-way message transmission has
remained unchanged. However, server backup has been
redesigned and is now more consistent with that of
normal user processes. Recovery and rebackup have been
completed in a less centralized and thus more efficient
manner. We review important aspects of the original
design and note how the implementation differs from our
original ideas. We then focus on the backup and
recovery for server processes and the changes and
additions in the design and implementation of recovery
and rebackup.",
acknowledgement = ack-nhfb,
affiliation = "Nixdorf Computer GmbH",
affiliationaddress = "Paderborn, West Ger",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Computer Architecture; Computer Operating
Systems; Computer Systems, Digital; Crash Handling;
Fault Tolerant Capability; Multiway Message
Transmission; reliability; Roll Forward Recovery;
Server Architecture; TARGON/32; UNIX",
subject = "{\bf D.4.0} Software, OPERATING SYSTEMS, General,
UNIX. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability, Backup procedures. {\bf
D.4.5} Software, OPERATING SYSTEMS, Reliability,
Checkpoint/restart. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors), Associative
processors. {\bf D.4.3} Software, OPERATING SYSTEMS,
File Systems Management. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management, Message
sending.",
}
@Article{Pittelli:1989:RST,
author = "Frank M. Pittelli and H{\'e}ctor Garc{\'\i}a-Molina",
title = "Reliable Scheduling in a {TMR} Database System",
journal = j-TOCS,
volume = "7",
number = "1",
pages = "25--60",
month = feb,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p25-pittelli/",
abstract = "A Triple Modular Redundant (TMR) system achieves high
reliability by replicating data and all processing at
three independent nodes. When TMR is used for database
processing all nonfaulty computers must execute the
same sequence of transactions, and this is ensured by a
collection of processes known as schedulers. In this
paper we study the implementation of efficient
schedulers through analysis of various enhancements
such as null transactions and message batching. The
schedulers have been implemented in an experimental TMR
system and the evaluation results are presented here.",
acknowledgement = ack-nhfb,
affiliation = "US Naval Acad",
affiliationaddress = "USA",
classification = "723; 913",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Database Systems; design; Distributed;
Message Batching; Null Transactions; performance;
reliability; Reliability; Reliable Scheduling;
Scheduling; Transaction Processing; Triple Modular
Redundancy",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Reliability, availability, and
serviceability. {\bf H.2.0} Information Systems,
DATABASE MANAGEMENT, General. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Scheduling.",
}
@Article{Raymond:1989:TBA,
author = "Kerry Raymond",
title = "A Tree-Based Algorithm for Distributed Mutual
Exclusion",
journal = j-TOCS,
volume = "7",
number = "1",
pages = "61--77",
month = feb,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p61-raymond/",
abstract = "We present an algorithm for distributed mutual
exclusion in a computer network of N nodes that
communicate by messages rather than shared memory. The
algorithm uses a spanning tree of the computer network,
and the number of messages exchanged per critical
section depends on the topology of this tree. However,
typically the number of messages exchanged is O(log N)
under light demand, and reduces to approximately four
messages under saturated demand. Each node holds
information only about its immediate neighbors in the
spanning tree rather than information about all nodes,
and failed nodes can recover necessary information from
their neighbors. The algorithm does not require
sequence numbers as it operates correctly despite
message overtaking.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Queensland",
affiliationaddress = "St. Lucia, Aust",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Computer Networks; Computer
Programming--Algorithms; Computer Systems, Digital;
design; Distributed; Distributed Mutual Exclusion;
Mathematical Techniques--Trees; Message Passing; Tree
Based Algorithms",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Mutual exclusion. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Synchronization.
{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf D.4.4} Software, OPERATING SYSTEMS, Communications
Management, Message sending.",
}
@Article{Thompson:1989:ESA,
author = "James G. Thompson and Alan Jay Smith",
title = "Efficient (Stack) Algorithms for Analysis of
Write-Back and Sector Memories",
journal = j-TOCS,
volume = "7",
number = "1",
pages = "78--117",
month = feb,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-1/p78-thompson/",
abstract = "For the class of replacement algorithms known as stack
algorithms, existing analysis techniques permit the
computation of memory miss ratios for all memory sizes
simultaneously in one pass over a memory reference
string. We extend the class of computations possible by
this methodology in two ways. First, we show how to
compute the effects of copy-backs in write-back caches.
The key observation here is that a given block is clean
for all memory sizes less than or equal to C blocks and
is dirty for all larger memory sizes. Our technique
permits efficient computations for algorithms or
systems using periodic write-back and\slash or block
deletion. The second extension permits stack analysis
simulation for sector (or subblock) caches in which a
sector (associated with an address tag) consists of
subsectors (or subblocks) that can be loaded
independently. The key observation here is that a
subsector is present only in caches of size C or
greater. Load forward prefetching in a sector cache is
shown to be a stack algorithm and is easily simulated
using our technique. Running times for our methods are
only slightly higher than for a simulation of a single
memory size using nonstack techniques.",
acknowledgement = ack-nhfb,
affiliation = "US Air Force",
affiliationaddress = "USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Cache Memories; Computer
Programming--Algorithms; Data Storage, Digital; design;
experimentation; measurement; Memory System
Performance; performance; Performance; Replacement
Algorithms; Sector Memories; Stack Algorithms; theory;
Write Back Memories",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles. {\bf B.6.1} Hardware, LOGIC DESIGN, Design
Styles, Memory control and access**. {\bf B.3.3}
Hardware, MEMORY STRUCTURES, Performance Analysis and
Design Aids**, Simulation**. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance.",
}
@Article{Gupta:1989:HSI,
author = "Anoop Gupta and Charles Forgy and Allen Newell",
title = "High-speed Implementations of Rule-Based Systems",
journal = j-TOCS,
volume = "7",
number = "2",
pages = "119--146",
month = may,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-2/p119-gupta/",
abstract = "We explore various methods for speeding up the
execution of rule-based systems. In particular, we
examine the role of parallelism in the high-speed
execution of rule-based systems and study the
architectural issues in the design of computers for
rule-based systems. Our results show that contrary to
initial expectations, the speedup that can be obtained
from parallelism is quite limited, only about tenfold.
The reasons for the small speed-up are: (1) the small
number of rules relevant to each change to data memory;
(2) the large variation in the processing requirements
of relevant rules; and (3) the small number of changes
made to data memory between synchronization steps.
Furthermore, we observe that to obtain this limited
factor of tenfold speed-up, it is necessary to exploit
parallelism at a very fine granularity. We propose that
a suitable architecture to exploit such fine-grain
parallelism is a shared-memory multiprocessor with
32-64 processors.",
acknowledgement = ack-nhfb,
affiliation = "Stanford Univ",
affiliationaddress = "Stanford, CA, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Artificial Intelligence; Computer
Architecture; Computer Systems, Digital--Parallel
Processing; design; languages; performance; Production
Systems; Rule Based Systems; Shared Memory
Multiprocessors; Speedup",
subject = "{\bf I.2.5} Computing Methodologies, ARTIFICIAL
INTELLIGENCE, Programming Languages and Software. {\bf
I.2.5} Computing Methodologies, ARTIFICIAL
INTELLIGENCE, Programming Languages and Software, OPS5.
{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors),
Multiple-instruction-stream, multiple-data-stream
processors (MIMD). {\bf G.1.0} Mathematics of
Computing, NUMERICAL ANALYSIS, General, Parallel
algorithms.",
}
@Article{Cheriton:1989:DGN,
author = "David R. Cheriton and Timothy P. Mann",
title = "Decentralizing a Global Naming Service for Improved
Performance and Fault Tolerance",
journal = j-TOCS,
volume = "7",
number = "2",
pages = "147--183",
month = may,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-2/p147-cheriton/",
abstract = "We address the problem of a global naming system,
proposing a three-level naming architecture that
consists of global, administrational, and managerial
naming mechanisms, each optimized to meet the
performance, reliability, and security requirements at
its own level. We focus in particular on a
decentralized approach to the lower levels, in which
naming is handled directly by the managers of the named
objects. Client-name caching and multicast are
exploited to implement name mapping with almost optimum
performance and fault tolerance. We also show how the
naming system can be made secure. Our conclusions are
bolstered by experience with an implementation in the V
distributed operating system.",
acknowledgement = ack-nhfb,
affiliation = "Stanford Univ",
affiliationaddress = "Stanford, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Computer Fault Tolerance; Computer Operating Systems;
Computer Systems, Digital; design; Distributed;
Distributed File Systems; experimentation; Global
Naming Service; measurement; performance; reliability",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Distributed file systems. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems. {\bf D.4.6} Software,
OPERATING SYSTEMS, Security and Protection.",
}
@Article{Agarwal:1989:ACM,
author = "Anant Agarwal and Mark Horowitz and John Hennessy",
title = "An Analytical Cache Model",
journal = j-TOCS,
volume = "7",
number = "2",
pages = "184--215",
month = may,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-2/p184-agarwal/",
abstract = "Trace-driven simulation and hardware measurement are
the techniques most often used to obtain accurate
performance figures for caches. The former requires a
large amount of simulation time to evaluate each cache
configuration while the latter is restricted to
measurements of existing caches. An analytical cache
model that uses parameters extracted from address
traces of programs can efficiently provide estimates of
cache performance and show the effects of varying cache
parameters. By representing the factors that affect
cache performance, we develop an analytical model that
gives miss rates for a given trace as a function of
cache size, degree of associativity, block size,
subblock size, multiprogramming level, task switch
interval, and observation interval. The predicted
values closely approximate the results of trace-driven
simulations, while requiring only a small fraction of
the computation cost.",
acknowledgement = ack-nhfb,
affiliation = "Stanford Univ",
affiliationaddress = "Stanford, CA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Cache Miss Rate; Cache Models; Computer Architecture;
Data Storage Units; design; measurement; Memory
Structures; performance; theory; Trace Driven
Simulation",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Formal models**. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Simulation**. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management,
Multiprocessing/multiprogramming/multitasking.",
}
@Article{Peterson:1989:PUC,
author = "Larry L. Peterson and Nick C. Buchholz and Richard D.
Schlichting",
title = "Preserving and Using Context Information in
Interprocess Communication",
journal = j-TOCS,
volume = "7",
number = "3",
pages = "217--246",
month = aug,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-3/p217-peterson/",
abstract = "When processes in a network communicate, the messages
they exchange define a partial ordering of externally
visible events. While the significance of this partial
order in distributed computing is well understood, it
has not been made an explicit part of the communication
substrate upon which distributed programs are
implemented. This paper describes a new interprocess
communication mechanism, called Psync, that explicitly
encodes this partial ordering with each message. The
paper shows how Psync can be efficiently implemented on
an unreliable communications network, and it
demonstrates how conversations serve as an elegant
foundation for ordering messages exchanged in a
distributed computation and for recovering from
processor failures.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Arizona",
affiliationaddress = "Tucson, AZ, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Computer Fault Tolerance; Computer
Programming--Algorithms; Computer Systems, Digital;
Context Information; Database Systems--Distributed;
design; Distributed; Interprocess Communication;
Partial Ordering; performance; Psync Protocol;
reliability",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Psync. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf D.4.4} Software, OPERATING SYSTEMS, Communications
Management, Network communication. {\bf C.4} Computer
Systems Organization, PERFORMANCE OF SYSTEMS, Design
studies.",
}
@Article{Satyanarayanan:1989:ISL,
author = "M. Satyanarayanan",
title = "Integrating Security in a Large Distributed System",
journal = j-TOCS,
volume = "7",
number = "3",
pages = "247--280",
month = aug,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-3/p247-satyanarayanan/",
abstract = "Andrew is a distributed computing environment that is
a synthesis of the personal computing and timesharing
paradigms. When mature, it is expected to encompass
over 5,000 workstations spanning the Carnegie Mellon
University campus. This paper examines the security
issues that arise in such an environment and describes
the mechanisms that have been developed to address
them. These mechanisms include the logical and physical
separation of servers and clients, support for secure
communication at the remote procedure call level, a
distributed authentication service, a file-protection
scheme that combines access lists with UNIX mode bits,
and the use of encryption as a basic building block.
The paper also discusses the assumptions underlying
security in Andrew and analyzes the vulnerability of
the system. Usage experience reveals that resource
control, particularly of workstation CPU cycles, is
more important than originally anticipated and that the
mechanisms available to address this issue are
rudimentary.",
acknowledgement = ack-nhfb,
affiliation = "Carnegie Mellon Univ",
affiliationaddress = "Pittsburgh, PA, USA",
classification = "722; 723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "algorithms; Andrew Distributed Computing Environment;
Computer Security; Computer Systems, Digital;
Computers, Personal; Cryptography; design; Distributed;
security; Time Sharing",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection. {\bf C.0} Computer Systems Organization,
GENERAL, Andrew. {\bf D.4.3} Software, OPERATING
SYSTEMS, File Systems Management, Distributed file
systems. {\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf E.3} Data, DATA ENCRYPTION, Data encryption
standard (DES)**.",
}
@Article{Shankar:1989:VDT,
author = "A. Udaya Shankar",
title = "Verified Data Transfer Protocols with Variable Flow
Control",
journal = j-TOCS,
volume = "7",
number = "3",
pages = "281--316",
month = aug,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-3/p281-shankar/",
abstract = "We present and verify a sliding window protocol which
uses modulo-N sequence numbers to achieve reliable
flow-controlled data transfer between a producer and a
consumer connected by unreliable channels. The
consumer's data needs are represented by a receive
window whose size can vary with time. The producer
entity sends segments of data words that lie within the
consumer's receive window. The consumer entity sends
acknowledgement, selective acknowledgement, and
selective reject messages that inform the producer
entity of the current receive window size, the data
word next expected, and the reception (or lack of
reception) of out-of-sequence data segments. Our
protocol is, therefore, a proper extension of existing
transport and data link protocol standards such as TCP,
ISO TP, HDLC, ADCCP, and so forth.",
acknowledgement = ack-nhfb,
affiliation = "Univ of Maryland",
affiliationaddress = "College Park, MD, USA",
classification = "723",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
journalabr = "ACM Trans Comput Syst",
keywords = "Computer Networks--Protocols; Data Transfer Protocols;
Data Transmission; design; Reliability; Sliding Window
Protocol; theory; Variable Flow Control; verification",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol verification. {\bf C.3} Computer Systems
Organization, SPECIAL-PURPOSE AND APPLICATION-BASED
SYSTEMS, Real-time and embedded systems. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Modeling and
prediction. {\bf F.3.1} Theory of Computation, LOGICS
AND MEANINGS OF PROGRAMS, Specifying and Verifying and
Reasoning about Programs. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management.",
}
@Article{Li:1989:MCS,
author = "Kai Li and Paul Hudak",
title = "Memory Coherence in Shared Virtual Memory Systems",
journal = j-TOCS,
volume = "7",
number = "4",
pages = "321--359",
month = nov,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-4/p321-li/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; experimentation; measurement;
performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Shared memory. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors), Interconnection
architectures. {\bf B.3.2} Hardware, MEMORY STRUCTURES,
Design Styles, Virtual memory. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications.",
}
@Article{Ng:1989:UHI,
author = "Tony P. Ng",
title = "Using Histories to Implement Atomic Objects",
journal = j-TOCS,
volume = "7",
number = "4",
pages = "360--393",
month = nov,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-4/p360-ng/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design",
subject = "{\bf H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Concurrency. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Distributed databases.
{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Synchronization. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Concurrency.
{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Distributed systems.",
}
@Article{Barbara:1989:IAU,
author = "Daniel Barbara and H{\'e}ctor Garc{\'\i}a-Molina and
Annemarie Spauster",
title = "Increasing Availability under Mutual Exclusion
Constraints with Dynamic Vote Reassignment",
journal = j-TOCS,
volume = "7",
number = "4",
pages = "394--426",
month = nov,
year = "1989",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1989-7-4/p394-barbara/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance; reliability",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Mutual
exclusion. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability.",
}
@Article{Schroeder:1990:PFR,
author = "Michael D. Schroeder and Michael Burrows",
title = "Performance of the {Firefly RPC}",
journal = j-TOCS,
volume = "8",
number = "1",
pages = "1--17",
month = feb,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p1-schroeder/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "measurement; performance",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors). {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS,
Measurement techniques. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements.",
}
@Article{Burrows:1990:LA,
author = "Michael Burrows and Martin Abadi and Roger Needham",
title = "A Logic of Authentication",
journal = j-TOCS,
volume = "8",
number = "1",
pages = "18--36",
month = feb,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p18-burrows/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "security; theory; verification",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol verification. {\bf C.2.0} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, General,
Security and protection (e.g., firewalls). {\bf D.4.6}
Software, OPERATING SYSTEMS, Security and Protection,
Authentication. {\bf F.3.1} Theory of Computation,
LOGICS AND MEANINGS OF PROGRAMS, Specifying and
Verifying and Reasoning about Programs. {\bf D.4.6}
Software, OPERATING SYSTEMS, Security and Protection,
Cryptographic controls.",
}
@Article{Bershad:1990:LRP,
author = "Brian N. Bershad and Thomas E. Anderson and Edward D.
Lazowska and Henry M. Levy",
title = "Lightweight Remote Procedure Call",
journal = j-TOCS,
volume = "8",
number = "1",
pages = "37--55",
month = feb,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p37-bershad/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance; security",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems. {\bf C.1.3} Computer Systems Organization,
PROCESSOR ARCHITECTURES, Other Architecture Styles,
Capability architectures**. {\bf D.4.6} Software,
OPERATING SYSTEMS, Security and Protection, Security
kernels**. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements.",
}
@Article{Anderson:1990:SCM,
author = "David P. Anderson and Ron Kuivila",
title = "A System for Computer Music Performance",
journal = j-TOCS,
volume = "8",
number = "1",
pages = "56--82",
month = feb,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-1/p56-anderson/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; experimentation; human factors;
languages; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Scheduling. {\bf D.4.7} Software, OPERATING
SYSTEMS, Organization and Design, Interactive systems.
{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Real-time systems and embedded systems.
{\bf D.4.4} Software, OPERATING SYSTEMS, Communications
Management, Input/output.",
}
@Article{Deering:1990:MRD,
author = "Stephen E. Deering and David R. Cheriton",
title = "Multicast Routing in Datagram Internetworks and
Extended {LANs}",
journal = j-TOCS,
volume = "8",
number = "2",
pages = "85--110",
month = may,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-2/p85-deering/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.5}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Local and Wide-Area Networks. {\bf C.2.2}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Protocols, Protocol architecture.
{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.",
}
@Article{Schwan:1990:TDO,
author = "Karsten Schwan and Win Bo",
title = "``Topologies'' --- Distributed Objects on
Multicomputers",
journal = j-TOCS,
volume = "8",
number = "2",
pages = "111--157",
month = may,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-2/p111-schwan/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance",
subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming, Parallel programming. {\bf
G.1.0} Mathematics of Computing, NUMERICAL ANALYSIS,
General, Parallel algorithms. {\bf C.1.2} Computer
Systems Organization, PROCESSOR ARCHITECTURES, Multiple
Data Stream Architectures (Multiprocessors), Parallel
processors**. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Message sending.",
}
@Article{Ramakrishnan:1990:BFS,
author = "K. K. Ramakrishnan and R. Jain",
title = "A Binary Feedback Scheme for Congestion Avoidance in
Computer Networks",
journal = j-TOCS,
volume = "8",
number = "2",
pages = "158--181",
month = may,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-2/p158-ramakrishnan/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.3}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Operations, Network monitoring. {\bf
C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols.",
}
@Article{Benson:1990:FPM,
author = "Glenn S. Benson and Ian F. Akyildiz and William F.
Aelbe",
title = "A Formal Protection Model of Security in Centralized,
Parallel, and Distributed Systems",
journal = j-TOCS,
volume = "8",
number = "3",
pages = "183--213",
month = aug,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-3/p183-benson/",
abstract = "One way to show that a system is not secure is to
demonstrate that a malicious or mistake-prone user or
program can break security by causing the system to
reach a nonsecure state. A fundamental aspect of a
security model is a proof that validates that every
state reachable from a secure initial state is secure.
A sequential security model assumes that every command
that acts as a state transition executes sequentially,
while a concurrent security model assumes that multiple
commands execute concurrently. This paper presents a
security model called the
Centralized-Parallel-Distributed model (CPD model) that
defines security for logically, or physically
centralized, parallel, and distributed systems. The
purpose of the CPD model is to define concurrency
conditions that guarantee that a concurrent system
cannot reach a state in which privileges are configured
in a nonsecure manner. As an example, the conditions
are used to construct a representation of a distributed
system.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; security; theory; verification",
subject = "{\bf C.2.0} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, General, Security and
protection (e.g., firewalls). {\bf C.1.2} Computer
Systems Organization, PROCESSOR ARCHITECTURES, Multiple
Data Stream Architectures (Multiprocessors), Parallel
processors**. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Concurrency. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Scheduling. {\bf F.3.1} Theory of Computation, LOGICS
AND MEANINGS OF PROGRAMS, Specifying and Verifying and
Reasoning about Programs. {\bf D.4.6} Software,
OPERATING SYSTEMS, Security and Protection, Access
controls.",
}
@Article{King:1990:DAM,
author = "Richard P. King",
title = "Disk Arm Movement in Anticipation of Future Requests",
journal = j-TOCS,
volume = "8",
number = "3",
pages = "214--229",
month = aug,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-3/p214-king/",
abstract = "When a disk drive's access arm is idle, it may not be
at the ideal location. In anticipation of future
requests, movement to some other location may be
advantageous. The effectiveness of anticipatory disk
arm movement is explored. Various operating conditions
are considered, and the reduction in seek distances and
request response times is determined for them. Suppose
that successive requests are independent and uniformly
distributed. By bringing the arm to the middle of its
range of motion when it is idle, the expected seek
distance can be reduced by 25 percent. Nonlinearity in
time versus distance can whittle that 25 percent
reduction down to a 13 percent reduction in seek time.
Nonuniformity in request location, nonPoisson arrival
processes, and high arrival rates can whittle the
reduction down to nothing. However, techniques are
discussed that maximize those savings that are still
possible under those circumstances. Various systems
with multiple arms are analyzed. Usually, it is best to
spread out the arms over the disk area. The both arms
should be brought to the middle.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Secondary storage. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Modeling and
prediction. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Simulation.",
}
@Article{Mitchell:1990:EPA,
author = "Chad L. Mitchell and Michael J. Flynn",
title = "The Effects of Processor Architecture on Instruction
Memory Traffic",
journal = j-TOCS,
volume = "8",
number = "3",
pages = "230--250",
month = aug,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-3/p230-mitchell/",
abstract = "The relative amount of instruction traffic for two
architectures is about the same in the presence of a
large cache as with no cache. Furthermore, the presence
of an intermediate-sized cache probably substantially
favors the denser architecture. Encoding techniques
have a much greater impact on instruction traffic than
do the differences between instruction set families
such as stack and register set. However, register set
architectures have somewhat lower instruction traffic
than directly comparable stack architectures of some
local variables are allocated in registers. This study
has clearly indicated that cache factors should be
taken into consideration when making architectural
tradeoffs. The differences in memory traffic between
two architectures may be greatly amplified in the
presence of a cache.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf C.0} Computer Systems
Organization, GENERAL, Instruction set design. {\bf
C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Performance attributes. {\bf B.3.3} Hardware,
MEMORY STRUCTURES, Performance Analysis and Design
Aids**, Simulation**.",
}
@Article{Gotzhein:1990:DPS,
author = "Reinhard Gotzhein and Gregor von Bochmann",
title = "Deriving Protocol Specifications from Service
Specifications Including Parameters",
journal = j-TOCS,
volume = "8",
number = "4",
pages = "255--283",
month = nov,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p255-gotzhein/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; verification",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems.",
}
@Article{Marzullo:1990:TFC,
author = "Keith Marzullo",
title = "Tolerating Failures of Continuous-Valued Sensors",
journal = j-TOCS,
volume = "8",
number = "4",
pages = "284--304",
month = nov,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p284-marzullo/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf C.3} Computer Systems Organization,
SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS, Process
control systems. {\bf F.3.1} Theory of Computation,
LOGICS AND MEANINGS OF PROGRAMS, Specifying and
Verifying and Reasoning about Programs.",
}
@Article{Lamport:1990:CRW,
author = "Leslie Lamport",
title = "Concurrent Reading and Writing of Clocks",
journal = j-TOCS,
volume = "8",
number = "4",
pages = "305--310",
month = nov,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p305-lamport/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; verification",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Concurrency. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming. {\bf
D.2.4} Software, SOFTWARE ENGINEERING, Software/Program
Verification. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management.",
}
@Article{Goldszmidt:1990:HLL,
author = "German S. Goldszmidt and Shaula Yemini",
title = "High-level Language Debugging for Concurrent
Programs",
journal = j-TOCS,
volume = "8",
number = "4",
pages = "311--336",
month = nov,
year = "1990",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1990-8-4/p311-goldszmidt/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "experimentation; verification",
subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing
and Debugging, Debugging aids. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming.",
}
@Article{Agrawal:1991:EFT,
author = "Divyakant Agrawal and Amr {El Abbadi}",
title = "An Efficient and Fault-Tolerant Solution for
Distributed Mutual Exclusion",
journal = j-TOCS,
volume = "9",
number = "1",
pages = "1--20",
month = feb,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-1/p1-agrawal/",
abstract = "In this paper, we present an efficient and
fault-tolerant algorithm for generating quorums to
solve the distributed mutual exclusion problem. The
algorithm uses a logical tree organization of the
network to generate tree quorums, which are logarithmic
in the size of the network in the best case. Our
approach is resilient to both site and communication
failures, even when such failures lead to network
partitioning. Furthermore, the algorithm exhibits a
property of graceful degradation, i.e., it requires
more messages only as the number of failures increase
in the network. We describe how tree quorums can be
used for various distributed applications for providing
mutually exclusive access to a distributed resource,
managing replicated objects, and atomically committing
a distributed transaction.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; reliability",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Mutual exclusion. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf
D.4.7} Software, OPERATING SYSTEMS, Organization and
Design, Distributed systems. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems.",
}
@Article{Mellor-Crummey:1991:ASS,
author = "John M. Mellor-Crummey and Michael L. Scott",
title = "Algorithms for Scalable Synchronization on
Shared-Memory Multiprocessors",
journal = j-TOCS,
volume = "9",
number = "1",
pages = "21--65",
month = feb,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-1/p21-mellor-crummey/",
abstract = "Busy-wait techniques are heavily used for mutual
exclusion and barrier synchronization in shared-memory
parallel programs. Unfortunately, typical
implementations of busy-waiting tend to produce large
amounts of memory and interconnect contention,
introducing performance bottlenecks that become
markedly more pronounced as applications scale. We
argue that this problem is not fundamental, and that
one can in fact construct busy-wait synchronization
algorithms that induce no memory or interconnect
contention. The key to these algorithms is for every
processor to spin on separate locally-accessible flag
variables, and for some other processor to terminate
the spin with a single remote write operation at an
appropriate time. Flag variables may be
locally-accessible as a result of coherent caching, or
by virtue of allocation in the local portion of
physically distributed shared memory. We present a new
scalable algorithm for spin locks that generates 0(1)
remote references per lock acquisition, independent of
the number of processors attempting to acquire the
lock. Our algorithm provides reasonable latency in the
absence of contention, requires only a constant amount
of space per lock, and requires no hardware support
other than a swap-with-memory instruction. We also
present a new scalable barrier algorithm that generates
0(1) remote references per processor reaching the
barrier, and observe that two previously-known barriers
can likewise be cast in a form that spins only on
locally-accessible flag variables. None of these
barrier algorithms requires hardware support beyond the
usual atomicity of memory reads and writes. We compare
the performance of our scalable algorithms with other
software approaches to busy-wait synchronization on
both a Sequent Symmetry and a BBN Butterfly. Our
principal conclusion is that contention due to
synchronization need not be a problem in large-scale
shared-memory multiprocessors. The existence of
scalable algorithms greatly weakens the case for costly
special-purpose hardware support for synchronization,
and provides a case against so-called ``dance hall''
architectures, in which shared memory locations are
equally far from all processors. ---From the Authors'
Abstract",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; measurement; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Synchronization. {\bf B.3.2} Hardware,
MEMORY STRUCTURES, Design Styles, Shared memory. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Mutual exclusion. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors), Interconnection
architectures. {\bf D.4.2} Software, OPERATING SYSTEMS,
Storage Management, Storage hierarchies. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Measurements.
{\bf C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Design studies.",
}
@Article{Huguet:1991:ASR,
author = "Miquel Huguet and Tom{\'a}s Lang",
title = "Architectural Support for Reduced Register
Saving\slash Restoring in Single-Window Register
Files",
journal = j-TOCS,
volume = "9",
number = "1",
pages = "66--97",
month = feb,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-1/p66-huguet/",
abstract = "The use of registers in a processor reduces the data
and instruction memory traffic. Since this reduction is
a significant factor in the improvement of the program
execution time, recent VLSI processors have a large
number of registers which can be used efficiently
because of the advances in compiler technology.
However, since registers have to be saved/restored
across function calls, the corresponding register
saving and restoring (RSR) memory traffic can almost
eliminate the overall reduction. This traffic has been
reduced by compiler optimizations and by providing
multiple-window register files. Although these
multiple-window architectures produce a large reduction
in the RSR traffic, they have several drawbacks which
make the single-window file preferable. We consider a
combination of {\em hardware support\/} and {\em
compiler optimizations\/} to reduce the RSR traffic for
a single-window register file, beyond the reductions
achieved by compiler optimizations alone. Basically,
this hardware keeps track of the registers that are
written during execution, so that the number of
registers saved is minimized. Moreover, hardware is
added so that a register is saved in the activation
record of the function that uses it (instead of in the
record of the current function); in this way a register
is restored only when it is needed, rather than
wholesale on procedure return. We present a register
saving and restoring policy that makes use of this
hardware, discuss its implementation, and evaluate the
traffic reduction when the policy is combined with
intraprocedural and interprocedural compiler
optimizations. We show that, on the average for the
four general-purpose programs measured, the RSR traffic
is reduced by about 90 percent for a small register
file (i.e., 32 registers), which results in an overall
data memory traffic reduction of about 15 percent.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; languages; performance",
subject = "{\bf B.5.2} Hardware, REGISTER-TRANSFER-LEVEL
IMPLEMENTATION, Design Aids, Optimization. {\bf B.5.1}
Hardware, REGISTER-TRANSFER-LEVEL IMPLEMENTATION,
Design, Data-path design. {\bf B.7.1} Hardware,
INTEGRATED CIRCUITS, Types and Design Styles, VLSI
(very large scale integration). {\bf B.1.4} Hardware,
CONTROL STRUCTURES AND MICROPROGRAMMING, Microprogram
Design Aids, Languages and compilers.",
}
@Article{Zhang:1991:VNT,
author = "Lixia Zhang",
title = "{VirtualClock}: a New Traffic Control Algorithm for
Packet-Switched Networks",
journal = j-TOCS,
volume = "9",
number = "2",
pages = "101--124",
month = may,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p101-zhang/",
abstract = "One of the challenging research issues in building
high-speed packet-switched networks is how to control
the transmission rate of statistical data flows. This
paper describes a new traffic control algorithm, {\em
VirtualClock}, for high-speed network applications.
VirtualClock monitors the average transmission rate of
statistical data flows and provides every flow with
guaranteed throughput and low queueing delay. It
provides firewall protection among individual flows, as
in a TDM system, while retaining the statistical
multiplexing advantages of packet switching. Simulation
results show that the VirtualClock algorithm meets all
its design goals.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Packet-switching networks. {\bf C.2.2}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Protocols, Protocol architecture.
{\bf C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Performance attributes.",
}
@Article{Liskov:1991:EMO,
author = "Barbara Liskov and Liuba Shrira and John Wroclawski",
title = "Efficient At-Most-Once Messages Based on Synchronized
Clocks",
journal = j-TOCS,
volume = "9",
number = "2",
pages = "125--142",
month = may,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p125-liskov/",
abstract = "This paper describes a new at-most-once message
passing protocol that provides guaranteed detection of
duplicate messages even when the receiver has no state
stored for the sender. It also discusses how to use
at-most-once messages to implement higher-level
primitives such as at-once-remote procedure calls and
sequenced bytestream protocols. Our performance
measurements indicate that at-most-once RPCs can
provide at the same cost as less desirable forms of
RPCs that do not guarantee at-most-once execution. Our
method is based on the assumption that clocks
throughout the system are loosely synchronized. Modern
clock synchronization protocols provide good bounds on
clock skew with high probability; our method depends on
the bound for performance but not for correctness.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf D.4.4} Software, OPERATING
SYSTEMS, Communications Management, Message sending.
{\bf C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Performance attributes. {\bf C.4} Computer
Systems Organization, PERFORMANCE OF SYSTEMS,
Measurement techniques.",
}
@Article{Bihari:1991:DAR,
author = "Thomas E. Bihari and Karsten Schwan",
title = "Dynamic Adaptation of Real-Time Software",
journal = j-TOCS,
volume = "9",
number = "2",
pages = "143--174",
month = may,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p143-bihari/",
abstract = "In large, dynamic, real-time computer systems, it is
frequently most cost effective to employ different
software performance and reliability techniques at
different levels of granularity, at different times, or
within different subsystems. These techniques may
include regulation of redundancy and resource
allocation, multiversion and multipath execution,
adjustments of program attributes such as time-out
periods and others. The management of software in such
systems is a difficult task. Software that may be
adapted to meet varying performance and reliability
requirements offers a solution. A REal-time Software
Adaptation System (RESAS) includes a uniform model of
adaptable software and provides the tool necessary for
programmers to implement algorithms that choose and
enact adaptations in real time. RESAS has been
implemented on a testbed consisting of a multiprocessor
and an attached workstation, and adaptation algorithms
have been developed that address the problem of
adapting software to achieve two goals: software
execution within specified time constraints and
software resiliency with respect to computer hardware
failures.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance; reliability",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Real-time systems and embedded systems.
{\bf C.3} Computer Systems Organization,
SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS,
Real-time and embedded systems. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Measurements. {\bf
D.4.6} Software, OPERATING SYSTEMS, Security and
Protection. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management.",
}
@Article{Bershad:1991:ULI,
author = "Brian N. Bershad and Thomas E. Anderson and Edward D.
Lazowska and Henry M. Levy",
title = "User-level Interprocess Communication for Shared
Memory Multiprocessors",
journal = j-TOCS,
volume = "9",
number = "2",
pages = "175--198",
month = may,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-2/p175-bershad/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.2} Software, OPERATING SYSTEMS, Storage Management.
{\bf C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors). {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Shared memory.",
}
@Article{Greenberg:1991:AUP,
author = "Albert G. Greenberg and Boris D. Lubachevsky and Isi
Mitrani",
title = "Algorithms for Unboundedly Parallel Simulations",
journal = j-TOCS,
volume = "9",
number = "3",
pages = "201--221",
month = aug,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p201-greenberg/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance",
subject = "{\bf I.6.8} Computing Methodologies, SIMULATION AND
MODELING, Types of Simulation, Parallel. {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors).
{\bf F.1.2} Theory of Computation, COMPUTATION BY
ABSTRACT DEVICES, Modes of Computation. {\bf I.6.8}
Computing Methodologies, SIMULATION AND MODELING, Types
of Simulation.",
}
@Article{Wang:1991:ETD,
author = "Wen-Hann Wang and Jean-Loup Baer",
title = "Efficient Trace-Driven Simulation Methods for Cache
Performance Analysis",
journal = j-TOCS,
volume = "9",
number = "3",
pages = "222--241",
month = aug,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p222-wang/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; measurement; performance",
subject = "{\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance
Analysis and Design Aids**, Simulation**. {\bf B.3.2}
Hardware, MEMORY STRUCTURES, Design Styles. {\bf B.3.3}
Hardware, MEMORY STRUCTURES, Performance Analysis and
Design Aids**.",
}
@Article{Garcia-Molina:1991:ORM,
author = "H{\'e}ctor Garc{\'\i}a-Molina and Annemarie Spauster",
title = "Ordered and Reliable Multicast Communication",
journal = j-TOCS,
volume = "9",
number = "3",
pages = "242--271",
month = aug,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p242-garcia-molina/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; reliability",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.1}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Architecture and Design. {\bf C.2.2}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Protocols. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management. {\bf
H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems.",
}
@Article{Schiper:1991:LCA,
author = "Andr{\'e} Schiper and Kenneth Birman and Pat
Stephenson",
title = "Lightweight Causal and Atomic Group Multicast",
journal = j-TOCS,
volume = "9",
number = "3",
pages = "272--314",
month = aug,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-3/p272-schiper/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; reliability",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.1}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Architecture and Design. {\bf C.2.2}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Protocols. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management. {\bf
D.4.7} Software, OPERATING SYSTEMS, Organization and
Design.",
}
@Article{Larowe:1991:ECM,
author = "Richard P. {Larowe, Jr.} and Carla Schlatter Ellis",
title = "Experimental Comparison of Memory Management Policies
for {NUMA} Multiprocessors",
journal = j-TOCS,
volume = "9",
number = "4",
pages = "319--363",
month = nov,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Oct 31 06:27:19 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p319-larowe/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "experimentation; management; measurement;
performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management. {\bf B.3.2} Hardware, MEMORY STRUCTURES,
Design Styles, Shared memory. {\bf C.1.2} Computer
Systems Organization, PROCESSOR ARCHITECTURES, Multiple
Data Stream Architectures (Multiprocessors),
Multiple-instruction-stream, multiple-data-stream
processors (MIMD). {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance.",
}
@Article{Karn:1991:IRT,
author = "Phil Karn and Craig Partridge",
title = "Improving Round-Trip Time Estimates in Reliable
Transport Protocols",
journal = j-TOCS,
volume = "9",
number = "4",
pages = "364--373",
month = nov,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p364-karn/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance; reliability",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol verification. {\bf C.2.1} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Architecture and Design, Packet-switching networks.
{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Store and forward networks. {\bf D.4.4}
Software, OPERATING SYSTEMS, Communications Management,
Message sending. {\bf D.4.4} Software, OPERATING
SYSTEMS, Communications Management, Network
communication.",
}
@Article{Kandlur:1991:RBA,
author = "Dilip D. Kandlur and Kang G. Shin",
title = "Reliable Broadcast Algorithms for {HARTS}",
journal = j-TOCS,
volume = "9",
number = "4",
pages = "374--398",
month = nov,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p374-kandlur/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance; reliability",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
HARTS. {\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols.",
}
@Article{Ahamad:1991:MV,
author = "Mustaque Ahamad and Mostafa H. Ammar and Shun Yan
Cheung",
title = "Multidimensional Voting",
journal = j-TOCS,
volume = "9",
number = "4",
pages = "399--431",
month = nov,
year = "1991",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1991-9-4/p399-ahamad/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; reliability; theory",
subject = "{\bf B.4.5} Hardware, INPUT/OUTPUT AND DATA
COMMUNICATIONS, Reliability, Testing, and
Fault-Tolerance**, Redundant design**. {\bf C.2.2}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Network Protocols. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems.",
}
@Article{Kistler:1992:DOC,
author = "James J. Kistler and M. Satyanarayanan",
title = "Disconnected Operation in the {Coda File System}",
journal = j-TOCS,
volume = "10",
number = "1",
pages = "3--25",
month = feb,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-1/p3-kistler/",
abstract = "{\em Disconnected operation\/} is a mode of operation
that enables a client to continue accessing critical
data during temporary failures of a shared data
repository. An important, though not exclusive,
application of disconnected operation is in supporting
portable computers. In this paper, we show that
disconnected operation is feasible, efficient and
usable by describing its design and implementation in
the Coda File System. The central idea behind our work
is that {\em caching of data}, now widely used for
performance, can also be exploited to improve {\em
availability.\/}",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance;
reliability",
subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf D.4.4} Software, OPERATING
SYSTEMS, Communications Management. {\bf D.4.3}
Software, OPERATING SYSTEMS, File Systems Management,
Distributed file systems. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Measurements.",
}
@Article{Rosenblum:1992:DIL,
author = "Mendel Rosenblum and John K. Ousterhout",
title = "The Design and Implementation of a Log-Structured File
System",
journal = j-TOCS,
volume = "10",
number = "1",
pages = "26--52",
month = feb,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-1/p26-rosenblum/",
abstract = "This paper presents a new technique for disk storage
management called a {\em log-structured file system}. A
log-structured file system writes all modifications to
disk sequentially in a log-like structure, thereby
speeding up both file writing and crash recovery. The
log is the only structure on disk; it contains indexing
information so that files can be read back from the log
efficiently. In order to maintain large free areas on
disk for fast writing, we divide the log into{\em
segments\/}and use a {\em segment cleaner\/} to
compress the live information from heavily fragmented
segments. We present a series of simulations that
demonstrate the efficiency of a simple cleaning policy
based on cost and benefit. We have implemented a
prototype log-structured file system called Sprite LFS;
it outperforms current Unix file systems by an order of
magnitude for small-file writes while matching or
exceeding Unix performance for reads and large writes.
Even when the overhead for cleaning is included, Sprite
LFS can use 70\% of the disk bandwidth for writing,
whereas Unix file systems typically can use only
5-10\%.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; measurement; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Secondary storage. {\bf D.4.2} Software,
OPERATING SYSTEMS, Storage Management,
Allocation/deallocation strategies. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability,
Checkpoint/restart. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Simulation.
{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Operational analysis. {\bf H.2.2} Information Systems,
DATABASE MANAGEMENT, Physical Design, Recovery and
restart. {\bf H.3.2} Information Systems, INFORMATION
STORAGE AND RETRIEVAL, Information Storage, File
organization.",
}
@Article{Anderson:1992:SAE,
author = "Thomas E. Anderson and Brian N. Bershad and Edward D.
Lazowska and Henry M. Levy",
title = "Scheduler Activations: Effective Kernel Support for
the User-Level Management of Parallelism",
journal = j-TOCS,
volume = "10",
number = "1",
pages = "53--79",
month = feb,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-1/p53-anderson/",
abstract = "{\em Threads\/} are the vehicle for concurrency in
many approaches to parallel programming. Threads can be
supported either by the operating system kernel or by
user-level library code in the application address
space, but neither approach has been fully
satisfactory. This paper addresses this dilemma. First,
we argue that the performance of kernel threads is {\em
inherently\/} worse than that of user-level threads,
rather than this being an artifact of existing
implementations; managing parallelism at the user level
is essential to high-performance parallel computing.
Next, we argue that the problems encountered in
integrating user-level threads with other system
services is a consequence of the lack of kernel support
for user-level threads provided by contemporary
multiprocessor operating systems; kernel threads are
the {\em wrong abstraction\/} on which to support
user-level management of parallelism. Finally, we
describe the design, implementation, and performance of
a new kernel interface and user-level thread package
that together provide the same functionality as kernel
threads without compromising the performance and
flexibility advantages of user-level management of
parallelism.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Scheduling. {\bf D.4.4} Software, OPERATING
SYSTEMS, Communications Management, Input/output. {\bf
D.4.7} Software, OPERATING SYSTEMS, Organization and
Design. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance.",
}
@Article{Mogul:1992:NLS,
author = "Jeffrey C. Mogul",
title = "Network Locality at the Scale of Processes",
journal = j-TOCS,
volume = "10",
number = "2",
pages = "81--109",
month = may,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-2/p81-mogul/",
abstract = "Packets on a LAN can be viewed as a series of
references to and from the objects they address. The
amount of locality in this reference stream may be
critical to the efficiency of network implementations,
if the locality can be exploited through caching or
scheduling mechanisms. Most previous studies have
treated network locality with an addressing granularity
of networks or individual hosts. This paper describes
some experiments tracing locality at a finer grain,
looking at references to individual processes, and with
fine-grained time resolution. Observations of typical
LANs show high per-process locality; that is, packets
to a host usually arrive for the process that most
recently sent a packet, and often with little
intervening delay.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; measurement; performance",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Packet-switching networks. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS,
Measurement techniques. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Performance
attributes. {\bf C.2.5} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Local and Wide-Area
Networks. {\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
TCP/IP.",
}
@Article{OMalley:1992:DNA,
author = "Sean W. O'Malley and Larry L. Peterson",
title = "A Dynamic Network Architecture",
journal = j-TOCS,
volume = "10",
number = "2",
pages = "110--143",
month = may,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-2/p110-o_malley/",
abstract = "Network software is a critical component of any
distributed system. Because of its complexity, network
software is commonly layered into a hierarchy of
protocols, or more generally, into a {\em protocol
graph}. Typical protocol graphs---including those
standardized in the ISO and TCP/IP network
architectures---share three important properties; the
protocol graph is simple, the nodes of the graph
(protocols) encapsulate complex functionality, and the
topology of the graph is relatively static. This paper
describes a new way to organize network software that
differs from conventional architectures in all three of
these properties. In our approach, the protocol graph
is complex, individual protocols encapsulate a single
function, and the topology of the graph is dynamic. The
main contribution of this paper is to describe the
ideas behind our new architecture, illustrate the
advantages of using the architecture, and demonstrate
that the architecture results in efficient network
software.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf C.2.1} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Architecture and Design, Network communications.",
}
@Article{Ramanathan:1992:DTC,
author = "Parameswaran Ramanathan and Kang G. Shin",
title = "Delivery of Time-Critical Messages using a Multiple
Copy Approach",
journal = j-TOCS,
volume = "10",
number = "2",
pages = "144--166",
month = may,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-2/p144-ramanathan/",
abstract = "Reliable and timely delivery of messages between
processing nodes is essential in distributed real-time
systems. Failure to deliver a message within its
deadline usually forces the system to undertake a
recovery action, which introduces some cost (or
overhead) to the system. This recovery cost can be very
high, especially when the recovery action fails due to
lack of time or resources. Proposed in this paper is a
scheme to minimize the expected cost incurred as a
result of messages failing to meet their deadlines. The
scheme is intended for distributed real-time systems,
especially with a point-to-point interconnection
topology. The goal of minimizing the expected cost is
achieved by sending multiple copies of a message
through disjoint routes and thus increasing the
probability of successful message delivery within the
deadline. However, as the number of copies increases,
the message traffic on the network increases, thereby
increasing the delivery time for each of the copies.
There is therefore a tradeoff between the number of
copies of each message and the expected cost incurred
as a result of messages missing their deadlines. The
number of copies of each message to be sent is
determined by optimizing this tradeoff. Simulation
results for a hexagonal mesh and a hypercube topology
indicate that the expected cost can be lowered
substantially by the proposed scheme.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance; reliability",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Reliability, availability, and
serviceability. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
C.3} Computer Systems Organization, SPECIAL-PURPOSE AND
APPLICATION-BASED SYSTEMS, Real-time and embedded
systems.",
}
@Article{Hsu:1992:ESN,
author = "William Tsun-Yuk Hsu and Pen-Chung Yew",
title = "An Effective Synchronization Network for Hot-Spot
Accesses",
journal = j-TOCS,
volume = "10",
number = "3",
pages = "167--189",
month = aug,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-3/p167-hsu/",
abstract = "In large multiprocessor systems, fast synchronization
is crucial for high performance. However,
synchronization traffic tends to create ``hot-spots''
in shared memory and cause network congestion.
Multistage shuffle-exchange networks have been proposed
and built to handle synchronization traffic. Software
combining schemes have also been proposed to relieve
network congestion caused by hot-spots. However,
multistage combining networks could be very expensive
and software combining could be very slow. In this
paper, we propose a single-stage combining network to
handle synchronization traffic, which is separated from
the regular memory traffic. A single-stage combining
network has several advantages: (1) it is attractive
from an implementation perspective because only one
stage is needed(instead of log {\em N\/} stages); (2)
Only one network is needed to handle both forward and
returning requests; (3) combined requests are
distributed evenly through the network---the wait
buffer size is reduced; and (4) fast-finishing
algorithms [30] can be used to shorten the network
delay. Because of all these advantages, we show that a
single-stage combining network gives good performance
at a lower cost than a multistage combining network.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Synchronization. {\bf C.1.2} Computer
Systems Organization, PROCESSOR ARCHITECTURES, Multiple
Data Stream Architectures (Multiprocessors),
Interconnection architectures. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems.",
}
@Article{Atkins:1992:ACC,
author = "M. S. Atkins and M. Y. Coady",
title = "Adaptable Concurrency Control for Atomic Data Types",
journal = j-TOCS,
volume = "10",
number = "3",
pages = "190--225",
month = aug,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-3/p190-atkins/",
abstract = "In many distributed systems concurrent access is
required to a shared object, where abstract object
servers may incorporate type-specific properties to
define consistency requirements. Each operation and its
outcome is treated as an event, and conflicts may occur
between different event types. Hence concurrency
control and synchronization are required at the
granularity of conflicting event types. With such a
fine granularity of locking, the occurrence of
conflicts is likely to be lower than with whole-object
locking, so optimistic techniques become more
attractive. This work describes the design,
implementation, and performance of servers for a shared
atomic object, a semiqueue, where each server employs
either pessimistic or optimistic locking techniques on
each conflicting event type. We compare the performance
of a purely optimistic server, a purely pessimistic
server, and a hybrid server which treats certain event
types optimistically and others pessimistically, to
demonstrate the most appropriate environment for using
pessimistic, optimistic, or hybrid control. We show
that the advantages of low overhead on optimistic
locking at low conflict levels is offset at higher
conflict levels by the wasted work done by aborted
transactions. To achieve optimum performance over the
whole range of conflict levels, an adaptable server is
required, whereby the treatment of conflicting event
types can be changed dynamically between optimistic and
pessimistic, according to various criteria depending on
the expected frequency of conflict. We describe our
implementations of adaptable servers which may allocate
concurrency control strategy on the basis of state
information, the history of conflicts encountered, or
by using preset transaction priorities. We show that
the adaptable servers perform almost as well as the
best of the purely optimistic, pessimistic, or hybrid
servers under the whole range of conflict levels,
showing the versatility and efficiency of the dynamic
servers. Finally we outline a general design
methodology for implementing adaptable concurrency
control in servers for atomic objects, illustrated
using an atomic shared B-tree.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Concurrency. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming,
Distributed programming. {\bf D.3.3} Software,
PROGRAMMING LANGUAGES, Language Constructs and
Features, Abstract data types. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Concurrency.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Deadlocks. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Mutual exclusion. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Synchronization. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Simulation.
{\bf H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Concurrency. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Transaction processing.",
}
@Article{Glasgow:1992:LRA,
author = "Janice Glasgow and Glenn Macewen and Prakash
Panangaden",
title = "A Logic for Reasoning about Security",
journal = j-TOCS,
volume = "10",
number = "3",
pages = "226--264",
month = aug,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-3/p226-glasgow/",
abstract = "A formal framework called {\em Security Logic\/} ({\em
SL\/}) is developed for specifying and reasoning about
security policies and for verifying that system designs
adhere to such policies. Included in this modal logic
framework are definitions of {\em knowledge}, {\em
permission}, and {\em obligation}. Permission is used
to specify secrecy policies and obligation to specify
integrity policies. The combination of policies is
addressed and examples based on policies from the
current literature are given.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "security; theory",
subject = "{\bf F.4.1} Theory of Computation, MATHEMATICAL LOGIC
AND FORMAL LANGUAGES, Mathematical Logic. {\bf H.2.0}
Information Systems, DATABASE MANAGEMENT, General. {\bf
K.6.5} Computing Milieux, MANAGEMENT OF COMPUTING AND
INFORMATION SYSTEMS, Security and Protection. {\bf
D.4.6} Software, OPERATING SYSTEMS, Security and
Protection.",
}
@Article{Lampson:1992:ADS,
author = "Butler Lampson and Mart{\'\i}n Abadi and Michael
Burrows and Edward Wobber",
title = "Authentication in Distributed Systems: Theory and
Practice",
journal = j-TOCS,
volume = "10",
number = "4",
pages = "265--310",
month = nov,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p265-lampson/",
abstract = "We describe a theory of authentication and a system
that implements it. Our theory is based on the notion
of principal and a ``speaks for'' relation between
principals. A simple principal either has a name or is
a communication channel; a compound principal can
express an adopted role or delegated authority. The
theory shows how to reason about a principal's
authority by deducing the other principals that it can
speak for; authenticating a channel is one important
application. We use the theory to explain many existing
and proposed security mechanisms. In particular, we
describe the system we have built. It passes principals
efficiently as arguments or results of remote procedure
calls, and it handles public and shared key encryption,
name lookup in a large name space, groups of
principals, program loading, delegation, access
control, and revocation.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "security; theory; verification",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Authentication. {\bf D.4.6} Software,
OPERATING SYSTEMS, Security and Protection, Access
controls. {\bf D.4.6} Software, OPERATING SYSTEMS,
Security and Protection, Cryptographic controls. {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf E.3} Data, DATA ENCRYPTION. {\bf K.6.5} Computing
Milieux, MANAGEMENT OF COMPUTING AND INFORMATION
SYSTEMS, Security and Protection, Authentication.",
}
@Article{Anderson:1992:FSC,
author = "David P. Anderson and Yoshitomo Osawa and Ramesh
Govindan",
title = "A File System for Continuous Media",
journal = j-TOCS,
volume = "10",
number = "4",
pages = "311--337",
month = nov,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p311-anderson/",
abstract = "The Continuous Media File System, CMFS, supports
real-time storage and retrieval of continuous media
data (digital audio and video) on disk. CMFS clients
read or write files in ``sessions,'' each with a
guaranteed minimum data rate. Multiple sessions,
perhaps with different rates, and non-real-time access
can proceed concurrently. CMFS addresses several
interrelated design issues; real-time semantics for
sessions, disk layout, an acceptance test for new
sessions, and disk scheduling policy. We use simulation
to compare different design choices.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, File organization. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, Access
methods. {\bf D.4.7} Software, OPERATING SYSTEMS,
Organization and Design, Real-time systems and embedded
systems. {\bf H.5.1} Information Systems, INFORMATION
INTERFACES AND PRESENTATION, Multimedia Information
Systems.",
}
@Article{Kessler:1992:PPA,
author = "R. E. Kessler and Mark D. Hill",
title = "Page Placement Algorithms for Large Real-Indexed
Caches",
journal = j-TOCS,
volume = "10",
number = "4",
pages = "338--359",
month = nov,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p338-kessler/",
abstract = "When a computer system supports both paged virtual
memory and large real-indexed caches, cache performance
depends in part on the main memory page placement. To
date, most operating systems place pages by selecting
an arbitrary page frame from a pool of page frames that
have been made available by the page replacement
algorithm. We give a simple model that shows that this
naive (arbitrary) page placement leads to up to 30\%
unnecessary cache conflicts. We develop several page
placement algorithms, called {\em careful-mapping
algorithms}, that try to select a page frame (from the
pool of available page frames) that is likely to reduce
cache contention. Using trace-driven simulation, we
find that careful mapping results in 10-20\% fewer
(dynamic) cache misses than naive mapping (for a
direct-mapped real-indexed multimegabyte cache). Thus,
our results suggest that careful mapping by the
operating system can get about half the cache miss
reduction that a cache size (or associativity) doubling
can.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; measurement; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Allocation/deallocation strategies. {\bf
B.3.2} Hardware, MEMORY STRUCTURES, Design Styles,
Cache memories. {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Virtual memory. {\bf B.3.3}
Hardware, MEMORY STRUCTURES, Performance Analysis and
Design Aids**, Simulation**. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management. {\bf
E.2} Data, DATA STORAGE REPRESENTATIONS. {\bf D.4.2}
Software, OPERATING SYSTEMS, Storage Management, Main
memory.",
}
@Article{Ladin:1992:PHA,
author = "Rivka Ladin and Barbara Liskov and Liuba Shrira and
Sanjay Ghemawat",
title = "Providing High Availability Using Lazy Replication",
journal = j-TOCS,
volume = "10",
number = "4",
pages = "360--391",
month = nov,
year = "1992",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1992-10-4/p360-ladin/",
abstract = "To provide high availability for services such as mail
or bulletin boards, data must be replicated. One way to
guarantee consistency of replicated data is to force
service operations to occur in the same order at all
sites, but this approach is expensive. For some
applications a weaker causal operation order can
preserve consistency while providing better
performance. This paper describes a new way of
implementing causal operations. Our technique also
supports two other kinds of operations: operations that
are totally ordered with respect to one another and
operations that are totally ordered with respect to all
other operations. The method performs well in terms of
response time, operation-processing capacity, amount of
stored state, and number and size of messages; it does
better than replication methods based on reliable
multicast techniques.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance; reliability",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Distributed systems. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed databases. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Reliability,
availability, and serviceability. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf
H.2.2} Information Systems, DATABASE MANAGEMENT,
Physical Design, Recovery and restart. {\bf H.2.4}
Information Systems, DATABASE MANAGEMENT, Systems,
Concurrency. {\bf H.2.4} Information Systems, DATABASE
MANAGEMENT, Systems, Distributed databases.",
}
@Article{Eager:1993:CER,
author = "Derek L. Eager and John Jahorjan",
title = "Chores: Enhanced Run-Time Support for Shared-Memory
Parallel Computing",
journal = j-TOCS,
volume = "11",
number = "1",
pages = "1--32",
month = feb,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-1/p1-eager/",
abstract = "Parallel computing is increasingly important in the
solution of large-scale numerical problems. The
difficulty of efficiently hand-coding parallelism, and
the limitations of parallelizing compilers, have
nonetheless restricted its use by scientific
programmers. In this paper we propose a new paradigm,
{\em chores}, for the run-time support of parallel
computing on shared-memory multiprocessors. We consider
specifically uniform memory access shared-memory
environments, although the chore paradigm should also
be appropriate for use within the clusters of a
large-scale nonuniform memory access machine. We argue
that chore systems attain both the high efficiency of
compiler approaches for the common case of data
parallelism, and the flexibility and performance of
user-level thread approaches for functional
parallelism. These benefits are achieved within a
single, simple conceptual model that almost entirely
relieves the programmer and compiler from concerns of
granularity, scheduling, and enforcement of
synchronization constraints. Measurements of a
prototype implementation demonstrate that the chore
model can be supported more efficiently than can
traditional approaches to either data or functional
parallelism alone.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management. {\bf D.4.9} Software, OPERATING SYSTEMS,
Systems Programs and Utilities. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems. {\bf C.3} Computer Systems Organization,
SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS. {\bf
C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS.",
}
@Article{Gheith:1993:CKS,
author = "Ahmed Gheith and Karsten Schwan",
title = "{CHAOS$^{\rm arc}$}: Kernel Support for Multiweight
Objects, Invocations, and Atomicity in Real-Time
Multiprocessor Applications",
journal = j-TOCS,
volume = "11",
number = "1",
pages = "33--72",
month = feb,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-1/p33-gheith/",
abstract = "CHAOSarc is an object-based multiprocessor operating
system kernel that provides primitives with which
programmers may easily construct objects of differing
types and object invocations of differing semantics,
targeting multiprocessor systems, and real-time
applications. The CHAOSarc can {\em guarantee\/}
desired performance and functionality levels of
selected computations in real-time applications. Such
guarantees can be made despite possible uncertainty in
execution environments by allowing programs to {\em
adapt\/} in performance and functionality to varying
operating conditions. This paper reviews the primitives
offered by CHAOSarc and demonstrates how the required
elements of the CHAOSarc real-time kernel are
constructed with those primitives.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Real-time systems and embedded systems.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management. {\bf J.7} Computer Applications, COMPUTERS
IN OTHER SYSTEMS, Real time. {\bf D.3.3} Software,
PROGRAMMING LANGUAGES, Language Constructs and
Features, Concurrent programming structures. {\bf C.3}
Computer Systems Organization, SPECIAL-PURPOSE AND
APPLICATION-BASED SYSTEMS, Real-time and embedded
systems. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements.",
}
@Article{Kaashoek:1993:FIP,
author = "M. Frans Kaashoek and Robbert van Renesse and Hans van
Staveren and Andrew S. Tanenbaum",
title = "{FLIP}: An Internetwork Protocol for Supporting
Distributed Systems",
journal = j-TOCS,
volume = "11",
number = "1",
pages = "73--106",
month = feb,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-1/p73-kaashoek/",
abstract = "Most modern network protocols give adequate support
for traditional applications such as file transfer and
remote login. Distributed applications, however, have
different requirements (e.g., efficient at-most-once
remote procedure call even in the face of processor
failures). Instead of using ad hoc protocols to meet
each of the new requirements, we have designed a new
protocol, called the Fast Local Internet Protocol
(FLIP), that provides a clean and simple integrated
approach to these new requirements. FLIP is an
unreliable message protocol that provides both
point-to-point communication and multicast
communication, and requires almost no network
management. Furthermore, by using FLIP we have
simplified higher-level protocols such as remote
procedure call and group communication, and enhanced
support for process migration and security. A prototype
implementation of FLIP has been built as part of the
new kernel for the Amoeba distributed operating system,
and is in daily use. Measurements of its performance
are presented.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; management; measurement; performance",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols.
{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Distributed systems. {\bf D.4.0} Software,
OPERATING SYSTEMS, General, Amoeba. {\bf D.4.4}
Software, OPERATING SYSTEMS, Communications
Management.",
}
@Article{Gopalakrishnan:1993:DVR,
author = "Ganesh Gopalakrishnan and Richard Fujimoto",
title = "Design and Verification of the {Rollback Chip} using
{HOP}: a Case Study of Formal Methods Applied to
Hardware Design",
journal = j-TOCS,
volume = "11",
number = "2",
pages = "109--145",
month = may,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-2/p109-gopalakrishnan/",
abstract = "The use of formal methods in hardware design improves
the quality of designs in many ways: it promotes better
understanding of the design; it permits systematic
design refinement through the discovery of invariants;
and it allows design verification (informal or formal).
In this paper we illustrate the use of formal methods
in the design of a custom hardware system called the
``Rollback Chip'' (RBC), conducted using a simple
hardware design description language called ``HOP''. An
informal specification of the requirements of the RBC
is first given, followed by a {\em behavioral
description\/} of the RBC stating its {\em desired
behavior}. The behavioral description is refined into
progressively more efficient designs, terminating in a
{\em structural description}. Key refinement steps are
based on system invariants that are discovered during
the design, and proved correct during design
verification. The first step in design verification is
to apply a program called PARCOMP to {\em derive\/} a
behavioral description from the structural description
of the RBC. The derived behavior is then compared
against the desired behavior using equational
verification techniques. This work demonstrates that
formal methods can be fruitfully applied to a
nontrivial hardware design. It also illustrates the
particular advantages of our approach based on HOP and
PARCOMP. Last, but not the least, it formally verifies
the RBC mechanism itself.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; languages; theory; verification",
subject = "{\bf B.7.2} Hardware, INTEGRATED CIRCUITS, Design
Aids, Verification. {\bf B.6.3} Hardware, LOGIC DESIGN,
Design Aids, Hardware description languages. {\bf
B.7.1} Hardware, INTEGRATED CIRCUITS, Types and Design
Styles. {\bf B.7.2} Hardware, INTEGRATED CIRCUITS,
Design Aids, Simulation.",
}
@Article{McCann:1993:DPA,
author = "Cathy McCann and Raj Vaswani and John Zahorjan",
title = "A Dynamic Processor Allocation Policy for
Multiprogrammed Shared-Memory Multiprocessors",
journal = j-TOCS,
volume = "11",
number = "2",
pages = "146--178",
month = may,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-2/p146-mccann/",
abstract = "We propose and evaluate empirically the performance of
a dynamic processor-scheduling policy for
multiprogrammed shared-memory multiprocessors. The
policy is dynamic in that it reallocates processors
from one parallel job to another based on the currently
realized parallelism of those jobs. The policy is
suitable for implementation in production systems in
that: ---It interacts well with very efficient
user-level thread packages, leaving to them many
low-level thread operations that do not require kernel
intervention. ---It deals with thread blocking due to
user I/O and page faults. ---It ensures fairness in
delivering resources to jobs. ---Its performance,
measured in terms of average job response time, is
superior to that of previously proposed schedulers,
including those implemented in existing systems. It
provides good performance to very short, sequential
(e.g., interactive) requests. We have evaluated our
scheduler and compared it to alternatives using a set
of prototype implementations running on a Sequent
Symmetry multiprocessor. Using a number of parallel
applications with distinct qualitative behaviors, we
have both evaluated the policies according to the major
criterion of overall performance and examined a number
of more general policy issues, including the advantage
of ``space sharing'' over ``time sharing'' the
processors of a multiprocessor, and the importance of
cooperation between the kernel and the application in
reallocating processors between jobs. We have also
compared the policies according to other criteia
important in real implementations, in particular,
fairness and respone time to short, sequential
requests. We conclude that a combination of performance
and implementation considerations makes a compelling
case for our dynamic scheduling policy.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Scheduling. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors).",
}
@Article{Thekkath:1993:LLL,
author = "Chandramohan A. Thekkath and Henry M. Levy",
title = "Limits to Low-Latency Communication on High-Speed
Networks",
journal = j-TOCS,
volume = "11",
number = "2",
pages = "179--203",
month = may,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-2/p179-thekkath/",
abstract = "The throughput of local area networks is rapidly
increasing. For example, the bandwidth of new ATM
networks and FDDI token rings is an order of magnitude
greater than that of Ethernets. Other network
technologies promise a bandwidth increase of yet
another order of magnitude in several years. However,
in distributed systems, lowered latency rather than
increased throughput is often of primary concern. This
paper examines the system-level effects of newer
high-speed network technologies on low-latency,
cross-machine communications. To evaluate a number of
influences, both hardware and software, we designed and
implemented a new remote procedure call system targeted
at providing low latency. We then ported this system to
several hardware platforms (DECstation and
SPARCstation) with several different networks and
controllers (ATM, FDDI, and Ethernet). Comparing these
systems allows us to explore the performance impact of
alternative designs in the communication system with
respect to achieving low latency, e.g., the network,
the network controller, the hose architecture and cache
system, and the kernel and user-level runtime software.
Our RPC system, which achieves substantially reduced
call times (170 [mu]seconds on an ATM network using
DECstation 5000/200 hosts), allows us to isolate those
components of next-generation networks and controllers
that still stand in the way of low-latency
communication. We demonstrate that new-generation
processor technology and software design can reduce
small-packet RPC times to near network-imposed limits,
making network and controller design more crucial than
ever to achieving truly low-latency communication.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Network communication. {\bf
D.4.4} Software, OPERATING SYSTEMS, Communications
Management, Message sending. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems. {\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA
COMMUNICATIONS, Input/Output Devices, Channels and
controllers. {\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf C.2.1} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Architecture and Design. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems.",
}
@Article{Ammann:1993:DTG,
author = "Paul Ammann and Sushil Jajodia",
title = "Distributed Timestamp Generation in Planar Lattice
Networks",
journal = j-TOCS,
volume = "11",
number = "3",
pages = "205--225",
month = aug,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-3/p205-ammann/",
abstract = "Timestamps are considered for distributed environments
in which information flow is restricted to one
direction through a planar lattice imposed on a
network. For applications in such networks, existing
timestamping algorithms require extension and
modification. For example, in secure environments,
typical timestamps provide a potential signaling
channel between incomparable levels. In hierarchical
databases, typical timestamps cause peripheral sites to
unnecessarily affect the behavior at main sites.
Algorithms are presented by which a network node may
generate and compare timestamps using timestamp
components maintained at dominated nodes in the
network. The comparison relation is shown to be acyclic
for timestamps produced by the generation algorithm. We
discuss ways to safely relax the requirement that the
network be a lattice. By example, we show how to modify
a simple nonplanar lattice so that the generation
algorithm can be applied. Uses of the timestamp
generation algorithm in the motivating applications are
outlined.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; security",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Distributed applications. {\bf C.2.0} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, General,
Security and protection (e.g., firewalls). {\bf G.2.m}
Mathematics of Computing, DISCRETE MATHEMATICS,
Miscellaneous. {\bf D.4.6} Software, OPERATING SYSTEMS,
Security and Protection, Information flow controls.
{\bf H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Concurrency. {\bf H.2.4} Information Systems,
DATABASE MANAGEMENT, Systems, Distributed databases.",
}
@Article{Anderson:1993:MCM,
author = "David P. Anderson",
title = "Metascheduling for Continuous Media",
journal = j-TOCS,
volume = "11",
number = "3",
pages = "226--252",
month = aug,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-3/p226-anderson/",
abstract = "Next-generation distributed systems will support {\em
continuous media\/} (digital audio and video) in the
same framework as other data. Many applications that
use continuous media need guaranteed end-to-end
performance (bounds on throughput and delay). To
reliably support these requirements, system components
such as CPU schedulers, networks, and file systems must
offer performance guarantees. A {\em metascheduler\/}
coordinates these components, negotiating end-to-end
guarantees on behalf of clients. The {\em CM-resource
model}, described in this paper, provides a basis for
such a metascheduler. It defines a workload
parameterization, an abstract interface to resources,
and an algorithm for reserving multiple resources. The
model uses an economic approach to dividing end-to-end
delay, and it allows system components to ``work
ahead,'' improving the performance of nonreal-time
workload.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; economics; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Scheduling. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Performance attributes. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management,
Buffering. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Network communication. {\bf
D.4.7} Software, OPERATING SYSTEMS, Organization and
Design, Real-time systems and embedded systems. {\bf
H.5.1} Information Systems, INFORMATION INTERFACES AND
PRESENTATION, Multimedia Information Systems, Audio
input/output. {\bf H.5.1} Information Systems,
INFORMATION INTERFACES AND PRESENTATION, Multimedia
Information Systems, Video (e.g., tape, disk, DVI).",
}
@Article{Lim:1993:WAS,
author = "Beng-Hong Lim and Anant Agarwal",
title = "Waiting Algorithms for Synchronization in Large-Scale
Multiprocessors",
journal = j-TOCS,
volume = "11",
number = "3",
pages = "253--294",
month = aug,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-3/p253-lim/",
abstract = "Through analysis and experiments, this paper
investigates two-phase waiting algorithms to minimize
the cost of waiting for synchronization in large-scale
multiprocessors. In a two-phase algorithm, a thread
first waits by polling a synchronization variable. If
the cost of polling reaches a limit {\em Lpoll\/} and
further waiting is necessary, the thread is blocked,
incurring an additional fixed cost, {\em B}. The choice
of {\em Lpoll\/} is a critical determinant of the
performance of two-phase algorithms. We focus on
methods for statically determining {\em Lpoll\/}
because the run-time overhead of dynamically
determining {\em Lpoll\/} can be comparable to the cost
of blocking in large-scale multiprocessor systems with
lightweight threads. Our experiments show that {\em
always-block\/} ({\em Lpoll\/} = 0) is a good waiting
algorithm with performance that is usually close to the
best of the algorithms compared. We show that even
better performance can be achieved with a static choice
of {\em Lpoll\/} based on knowledge of likely wait-time
distributions. Motivated by the observation that
different synchronization types exhibit different
wait-time distributions, we prove that a static choice
of {\em Lpoll\/} can yield close to optimal on-line
performance against an adversary that is restricted to
choosing wait times from a fixed family of probability
distributions. This result allows us to make an optimal
static choice of {\em Lpoll\/} based on synchronization
type. For exponentially distributed wait times, we
prove that setting {\em Lpoll\/} = 1n(e-1){\em B\/}
results in a waiting cost that is no more than {\em
e/(e-1)\/} times the cost of an optimal off-line
algorithm. For uniformly distributed wait times, we
prove that setting {\em L\/}poll=1/2(square root of 5
-1){\em B\/} results in a waiting cost that is no more
than (square root of 5 + 1)/2 (the golden ratio) times
the cost of an optimal off-line algorithm. Experimental
measurements of several parallel applications on the
Alewife multiprocessor simulator corroborate our
theoretical findings.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; experimentation; performance; theory",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Synchronization. {\bf D.4.1} Software,
OPERATING SYSTEMS, Process Management, Mutual
exclusion. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors), Parallel
processors**. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Stochastic analysis.",
}
@Article{Hill:1993:CSM,
author = "Mark D. Hill and James R. Larus and Steven K.
Reinhardt and David A. Wood",
title = "Cooperative Shared Memory: Software and Hardware for
Scalable Multiprocessors",
journal = j-TOCS,
volume = "11",
number = "4",
pages = "300--318",
month = nov,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p300-hill/",
abstract = "We believe the paucity of massively parallel,
shared-memory machines follows from the lack of a
shared-memory programming performance model that can
inform programmers of the cost of operations (so they
can avoid expensive ones) and can tell hardware
designers which cases are common (so they can build
simple hardware to optimize them). Cooperative shared
memory, our approach to shared-memory design, addresses
this problem. Our initial implementation of cooperative
shared memory uses a simple programming model, called
Check-In/Check-Out (CICO), in conjunction with even
simpler hardware, called Dir1SW. In CICO, programs
bracket uses of shared data with a check\_in directive
terminating the expected use of the data. A cooperative
prefetch directive helps hide communication latency.
Dir1SW is a minimal directory protocol that adds little
complexity to message-passing hardware, but efficiently
supports programs written within the CICO model.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Shared memory. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Simulation**. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors). {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors),
Parallel processors**. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Design studies.
{\bf C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS, Modeling techniques. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming,
Parallel programming.",
}
@Article{Anderson:1993:HSS,
author = "Thomas E. Anderson and Susan S. Owicki and James B.
Saxe and Charles P. Thacker",
title = "High-speed Switch Scheduling for Local-Area Networks",
journal = j-TOCS,
volume = "11",
number = "4",
pages = "319--352",
month = nov,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p319-anderson/",
abstract = "Current technology trends make it possible to build
communication networks that can support
high-performance distributed computing. This paper
describes issues in the design of a prototype switch
for an arbitrary topology point-to-point network with
link speeds of up to 1 Gbit/s. The switch deals in
fixed-length ATM-style cells, which it can process at a
rate of 37 million cells per second. It provides high
bandwidth and low latency for datagram traffic. In
addition, it supports real-time traffic by providing
bandwidth reservations with guaranteed latency bounds.
The key to the switch's operation is a technique called
{\em parallel iterative matching}, which can quickly
identify a set of conflict-free cells for transmission
in a time slot. Bandwidth reservations are accommodated
in the switch by building a fixed schedule for
transporting cells from reserved flows across the
switch; parallel iterative matching can fill unused
slots with datagram traffic. Finally, we note that
parallel iterative matching may not allocate bandwidth
fairly among flows of datagram traffic. We describe a
technique called {\em statistical matching}, which can
be used to ensure fairness at the switch and to support
applications with rapidly changing needs for guaranteed
bandwidth.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; experimentation; performance",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.2.5}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Local and Wide-Area Networks, Access schemes.
{\bf G.2.2} Mathematics of Computing, DISCRETE
MATHEMATICS, Graph Theory, Graph algorithms.",
}
@Article{Li:1993:ANL,
author = "Wei Li and Keshav Pingali",
title = "Access Normalization: Loop Restructuring for {NUMA}
Computers",
journal = j-TOCS,
volume = "11",
number = "4",
pages = "353--375",
month = nov,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p353-li/",
abstract = "In scalable parallel machines, processors can make
local memory accesses much faster than they can make
remote memory accesses. Additionally, when a number of
remote accesses must be made, it is usually more
efficient to use block transfers of data rather than to
use many small messages. To run well on such machines,
software must exploit these features. We believe it is
too onerous for a programmer to do this by hand, so we
have been exploring the use of restructuring compiler
technology for this purpose. In this article, we start
with a language like HPF-Fortran with user-specified
data distribution and develop a systematic loop
transformation strategy called {\em access
normalization\/} that restructures loop nests to
exploit locality and block transfers. We demonstrate
the power of our techniques using routines from the
BLAS (Basic Linear Algebra Subprograms) library. An
important feature of our approach is that we model loop
transformation using {\em invertible\/} matrices and
integer lattice theory.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; experimentation; languages; performance",
subject = "{\bf C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors), Multiple-instruction-stream,
multiple-data-stream processors (MIMD). {\bf D.1.3}
Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Parallel programming. {\bf D.3.4}
Software, PROGRAMMING LANGUAGES, Processors, Compilers.
{\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Optimization. {\bf D.3.4} Software,
PROGRAMMING LANGUAGES, Processors, Code generation.",
}
@Article{Mahlke:1993:SSM,
author = "Scott A. Mahlke and William Y. Chen and Roger A.
Bringmann and Richard E. Hank and Wen-Mei W. Hwu and B.
Ramakrishna Rau and Michael S. Schlansker",
title = "Sentinel Scheduling: a Model for Compiler-Controlled
Speculative Execution",
journal = j-TOCS,
volume = "11",
number = "4",
pages = "376--408",
month = nov,
year = "1993",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1993-11-4/p376-mahlke/",
abstract = "Speculative execution is an important source of
parallelism for VLIW and superscalar processors. A
serious challenge with compiler-controlled speculative
execution is to efficiently handle exceptions for
speculative instructions. In this article, a set of
architectural features and compile-time scheduling
support collectively referred to as {\em sentinel
scheduling\/} is introduced. Sentinel scheduling
provides an effective framework for both
compiler-controlled speculative execution and exception
handling. All program exceptions are accurately
detected and reported in a timely manner with sentinel
scheduling. Recovery from exceptions is also ensured
with the model. Experimental results show the
effectiveness of sentinel scheduling for exploiting
instruction-level parallelism and overhead associated
with exception handling.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Associative memories. {\bf C.0} Computer
Systems Organization, GENERAL, Hardware/software
interfaces. {\bf C.0} Computer Systems Organization,
GENERAL, Instruction set design. {\bf C.0} Computer
Systems Organization, GENERAL, System architectures.
{\bf C.1.1} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Single Data Stream Architectures,
Pipeline processors**. {\bf D.2.5} Software, SOFTWARE
ENGINEERING, Testing and Debugging, Error handling and
recovery. {\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Code generation. {\bf D.3.4} Software,
PROGRAMMING LANGUAGES, Processors, Compilers. {\bf
D.3.4} Software, PROGRAMMING LANGUAGES, Processors,
Optimization.",
}
@Article{Wobber:1994:ATO,
author = "Edward Wobber and Mart{\'\i}n Abadi and Michael
Burrows and Butler Lampson",
title = "Authentication in the {Taos} Operating System",
journal = j-TOCS,
volume = "12",
number = "1",
pages = "3--32",
month = feb,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-1/p3-wobber/",
abstract = "We describe a design for security in a distributed
system and its implementation. In our design,
applications gain access to security services through a
narrow interface. This interface provides a notion of
identity that includes simple principals, groups,
roles, and delegations. A new operating system
component manages principals, credentials, and secure
channels. It checks credentials according to the formal
rules of a logic of authentication. Our implementation
is efficient enough to support a substantial user
community.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; security; theory",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Authentication. {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf D.4.6} Software, OPERATING
SYSTEMS, Security and Protection, Access controls.",
}
@Article{Satyanarayanan:1994:LRV,
author = "M. Satyanarayanan and Henry H. Mashburn and Puneet
Kumar and David C. Steere and James J. Kistler",
title = "Lightweight Recoverable Virtual Memory",
journal = j-TOCS,
volume = "12",
number = "1",
pages = "33--57",
month = feb,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-1/p33-satyanarayanan/",
abstract = "{\em Recoverable virtual memory\/}refers to regions of
a virtual address space on which transactional
guarantees are offered. This article describes RVM, an
efficient, portable, and easily used implementation of
recoverable virtual memory for Unix environments. A
unique characteristic of RVM is that it allows
independent control over the transactional properties
of atomicity, permanence, and serializability. This
leads to considerable flexibility in the use of RVM,
potentially enlarging the range of applications that
can benefit from transactions. It also simplifies the
layering of functionality such as nesting and
distribution. The article shows that RVM performs well
over its intended range of usage even though it does
not benefit from specialized operating system support.
It also demonstrates the importance of intra- and
inter-transaction optimizations.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance;
reliability",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Virtual memory. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf
D.4.8} Software, OPERATING SYSTEMS, Performance,
Measurements. {\bf H.2.2} Information Systems, DATABASE
MANAGEMENT, Physical Design, Recovery and restart. {\bf
H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Transaction processing.",
}
@Article{Heidemann:1994:FSD,
author = "John S. Heidemann and Gerald J. Popek",
title = "File-system Development with Stackable Layers",
journal = j-TOCS,
volume = "12",
number = "1",
pages = "58--89",
month = feb,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-1/p58-heidemann/",
abstract = "Filing services have experienced a number of
innovations in recent years, but many of these
promising ideas have failed to enter into broad use.
One reason is that current filing environments present
several barriers to new development. For example, file
systems today typically stand alone instead of building
on the work of others, and support of new filing
services often requires changes that invalidate
existing work. Stackable file-system design addresses
these issues in several ways. Complex filing services
are constructed from layer ``building blocks,'' each of
which may be provided by independent parties. There are
no syntactic constraints to layer order, and layers can
occupy different address spaces, allowing very flexible
layer configuration. Independent layer evolution and
development are supported by an extensible interface
bounding each layer. This paper discusses stackable
layering in detail and presents design techniques it
enables. We describe an implementation providing these
facilities that exhibits very high performance. By
lowering barriers to new filing design, stackable
layering offers the potential of broad third-party
file-system development not feasible today.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Maintenance**. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design,
Hierarchical design**. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements.",
}
@Article{Attiya:1994:SCV,
author = "Hagit Attiya and Jennifer L. Welch",
title = "Sequential Consistency versus Linearizability",
journal = j-TOCS,
volume = "12",
number = "2",
pages = "91--122",
month = may,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-2/p91-attiya/",
abstract = "The power of two well-known consistency conditions for
shared-memory multiprocessors, {\em sequential
consistency\/} and {\em linearizability}, is compared.
The cost measure studied is the worst-case response
time in distributed implementations of virtual shared
memory supporting one of the two conditions. Three
types of shared-memory objects are considered:
read/write objects, FIFO queues, and stacks. If clocks
are only approximately synchronized (or do not exist),
then for all three object types it is shown that
linearizability is more expensive than sequential
consistency. We show that, for all three data types,
the worst-case response time is very sensitive to the
assumptions that are made about the timing information
available to the system. Under the strong assumption
that processes have perfectly synchronized clocks, it
is shown that sequential consistency and
linearizability are equally costly. We present upper
bounds for linearizability and matching lower bounds
for sequential consistency. The upper bounds are shown
by presenting algorithms that use atomic broadcast in a
modular fashion. The lower-bound proofs for the
approximate case use the technique of ``shifting,''
first introduced for studying the clock synchronization
problem.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf D.1.3} Software, PROGRAMMING TECHNIQUES,
Concurrent Programming, Distributed programming. {\bf
D.3.3} Software, PROGRAMMING LANGUAGES, Language
Constructs and Features, Concurrent programming
structures. {\bf D.4.2} Software, OPERATING SYSTEMS,
Storage Management, Distributed memories. {\bf F.1.2}
Theory of Computation, COMPUTATION BY ABSTRACT DEVICES,
Modes of Computation, Parallelism and concurrency. {\bf
H.2.4} Information Systems, DATABASE MANAGEMENT,
Systems, Distributed databases. {\bf H.2.4} Information
Systems, DATABASE MANAGEMENT, Systems, Concurrency.",
}
@Article{Mann:1994:CDF,
author = "Timothy Mann and Andrew Birrell and Andy Hisgen and
Charles Jerian and Garret Swart",
title = "A Coherent Distributed File Cache with Directory
Write-Behind",
journal = j-TOCS,
volume = "12",
number = "2",
pages = "123--164",
month = may,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-2/p123-mann/",
abstract = "Extensive caching is a key feature of the Echo
distributed file system. Echo client machines maintain
coherent caches of file and directory data and
properties, with write-behind (delayed write-back) of
{\em all\/} cached information. Echo specifies ordering
constraints on this write-behind, enabling applications
to store and maintain consistent data structures in the
file system even when crashes or network faults prevent
some writes from being completed. In this paper we
describe the Echo cache's coherence and ordering
semantics, show how they can improve the performance
and consistency of applications, explain how they are
implemented. We also discuss the general problem of
reliably notifying applications and users when
write-behind is lost; we addressed this problem as part
of the Echo design, but did not find a fully
satisfactory solution.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance;
reliability; security",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Distributed file systems.",
}
@Article{Uhlig:1994:DTS,
author = "Richard Uhlig and David Nagle and Tim Stanley and
Trevor Mudge and Stuart Sechrest and Richard Brown",
title = "Design Tradeoffs for Software-Managed {TLBs}",
journal = j-TOCS,
volume = "12",
number = "3",
pages = "175--205",
month = aug,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-3/p175-uhlig/",
abstract = "An increasing number of architectures provide virtual
memory support through software-managed TLBs. However,
software management can impose considerable penalties
that are highly dependent on the operating system's
structure and its use of virtual memory. This work
explores software-managed TLB design tradeoffs and
their interaction with a range of monolithic and
microkernel operating systems. Through hardware
monitoring and simulation, we explore TLB performance
for benchmarks running on a MIPS R2000-based
workstation running Ultrix, OSF/1, and three versions
of Mach 3.0.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Virtual memory. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Measurement
techniques. {\bf B.3.2} Hardware, MEMORY STRUCTURES,
Design Styles, Associative memories. {\bf B.3.2}
Hardware, MEMORY STRUCTURES, Design Styles, Cache
memories. {\bf B.3.2} Hardware, MEMORY STRUCTURES,
Design Styles, Virtual memory. {\bf B.3.3} Hardware,
MEMORY STRUCTURES, Performance Analysis and Design
Aids**, Simulation**. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements.",
}
@Article{Stodolsky:1994:PLD,
author = "Daniel Stodolsky and Mark Holland and William V.
{Courtright II} and Garth A. Gibson",
title = "Parity Logging Disk Arrays",
journal = j-TOCS,
volume = "12",
number = "3",
pages = "206--235",
month = aug,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-3/p206-stodolsky/",
abstract = "Parity-encoded redundant disk arrays provide highly
reliable, cost-effective secondary storage with high
performance for reads and large writes. Their
performance on small writes, however, is much worse
than mirrored disks---the traditional, highly reliable,
but expensive organization for secondary storage.
Unfortunately, small writes are a substantial portion
of the I/O workload of many important, demanding
applications such as on-line transaction processing.
This paper presents {\em parity logging}, a novel
solution to the small-write problem for redundant disk
arrays. Parity logging applies journalling techniques
to reduce substantially the cost of small writes. We
provide detailed models of parity logging and competing
schemes---mirroring, floating storage, and RAID level
5---and verify these models by simulation. Parity
logging provides performance competitive with
mirroring, but with capacity overhead close to the
minimum offered by RAID level 5. Finally, parity
logging can exploit data caching more effectively than
all three alternative approaches.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance; reliability",
subject = "{\bf B.4.2} Hardware, INPUT/OUTPUT AND DATA
COMMUNICATIONS, Input/Output Devices, Channels and
controllers. {\bf B.4.5} Hardware, INPUT/OUTPUT AND
DATA COMMUNICATIONS, Reliability, Testing, and
Fault-Tolerance**, Redundant design**.",
}
@Article{Cao:1994:TPR,
author = "Pei Cao and Swee Boon Lin and Shivakumar Venkataraman
and John Wilkes",
title = "The {TickerTAIP} Parallel {RAID} Architecture",
journal = j-TOCS,
volume = "12",
number = "3",
pages = "236--269",
month = aug,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-3/p236-cao/",
abstract = "Traditional disk arrays have a centralized
architecture, with a single controller through which
all requests flow. Such a controller is a single point
of failure, and its performance limits the maximum
number of disks to which the array can scale. We
describe TickerTAIP, a parallel architecture for disk
arrays that distributes the controller functions across
several loosely coupled processors. The result is
better scalability, fault tolerance, and flexibility.
This article presents the TickerTAIP architecture and
an evaluation of its behavior. We demonstrate the
feasibility by a working example, describe a family of
distributed algorithms for calculating RAID parity,
discuss techniques for establishing request atomicity,
sequencing, and recovery, and evaluate the performance
of the TickerTAIP design in both absolute terms and by
comparison to a centralized RAID implementation. We
also analyze the effects of including disk-level
request-scheduling algorithms inside the array. We
conclude that the Ticker TAIP architectural approach is
feasible, useful, and effective.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance; reliability",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Distributed systems. {\bf B.4.2} Hardware,
INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output
Devices, Channels and controllers. {\bf D.1.3}
Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Parallel programming. {\bf D.4.2}
Software, OPERATING SYSTEMS, Storage Management,
Secondary storage. {\bf D.4.7} Software, OPERATING
SYSTEMS, Organization and Design, Distributed
systems.",
}
@Article{Chase:1994:SPS,
author = "Jeffrey S. Chase and Henry M. Levy and Michael J.
Feeley and Edward D. Lazowska",
title = "Sharing and Protection in a Single-Address-Space
Operating System",
journal = j-TOCS,
volume = "12",
number = "4",
pages = "271--307",
month = nov,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-4/p271-chase/",
abstract = "This article explores memory sharing and protection
support in Opal, a single-address-space operating
system designed for wide-address (64-bit)
architectures. Opal threads execute within protection
domains in a single shared virtual address space.
Sharing is simplified, because addresses are context
independent. There is no loss of protection, because
addressability and access are independent; the right to
access a segment is determined by the protection domain
in which a thread executes. This model enables
beneficial code-and data-sharing patterns that are
currently prohibitive, due in part to the inherent
restrictions of multiple address spaces, and in part to
Unix programming style. We have designed and
implemented an Opal prototype using the Mach 3.0
microkernel as a base. Our implementation demonstrates
how a single-address-space structure can be supported
alongside of other environments on a modern microkernel
operating system, using modern wide-address
architectures. This article justifies the Opal model
and its goals for sharing and protection, presents the
system and its abstractions, describes the prototype
implementation, and reports experience with integrated
applications.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management. {\bf C.1.3} Computer Systems Organization,
PROCESSOR ARCHITECTURES, Other Architecture Styles,
Capability architectures**. {\bf D.3.3} Software,
PROGRAMMING LANGUAGES, Language Constructs and
Features, Modules, packages. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management. {\bf
D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Access controls. {\bf D.4.6} Software,
OPERATING SYSTEMS, Security and Protection, Information
flow controls. {\bf D.4.7} Software, OPERATING SYSTEMS,
Organization and Design. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Measurements. {\bf E.1}
Data, DATA STRUCTURES. {\bf E.2} Data, DATA STORAGE
REPRESENTATIONS.",
}
@Article{Chen:1994:NAP,
author = "Peter M. Chen and David A. Patterson",
title = "A New Approach to {I/O} Performance Evaluation:
Self-Scaling {I/O} Benchmarks, Predicted {I/O}
Performance",
journal = j-TOCS,
volume = "12",
number = "4",
pages = "308--339",
month = nov,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-4/p308-chen/",
abstract = "Current I/O benchmarks suffer from several chronic
problems: they quickly become obsolete; they do not
stress the I/O system; and they do not help much in
understanding I/O system performance. We propose a new
approach to I/O performance analysis. First, we propose
a self-scaling benchmark that dynamically adjusts
aspects of its workload according to the performance
characteristic of the system being measured. By doing
so, the benchmark automatically scales across current
and future systems. The evaluation aids in
understanding system performance by reporting how
performance varies according to each of five workload
parameters. Second, we propose predicted performance, a
technique for using the results from the self-scaling
evaluation to estimate quickly the performance for
workloads that have not been measured. We show that
this technique yields reasonably accurate performance
estimates and argue that this method gives a far more
accurate comparative performance evaluation than
traditional single-point benchmarks. We apply our new
evaluation technique by measuring a SPARCstation 1+
with one SCSI disk, an HP 730 with one SCSI-II disk, a
DECstation 5000/200 running the Sprite LFS operating
system with a three-disk disk array, a Convex C240
minisupercomputer with a four-disk disk array, and a
Solbourne 5E/905 fileserver with a two-disk disk
array.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "performance",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Measurements. {\bf K.6.2} Computing Milieux, MANAGEMENT
OF COMPUTING AND INFORMATION SYSTEMS, Installation
Management, Benchmarks. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS. {\bf D.2.8}
Software, SOFTWARE ENGINEERING, Metrics, Performance
measures. {\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Input/output.",
}
@Article{Reiter:1994:SAF,
author = "Michael K. Reiter and Kenneth P. Birman and Robbert
van Renesse",
title = "A Security Architecture for Fault-Tolerant Systems",
journal = j-TOCS,
volume = "12",
number = "4",
pages = "340--371",
month = nov,
year = "1994",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1994-12-4/p340-reiter/",
abstract = "Process groups are a common abstraction for
fault-tolerant computing in distributed systems. We
present a security architecture that extends the
process group into a security abstraction. Integral
parts of this architecture are services that securely
and fault tolerantly support cryptographic key
distribution. Using replication only when necessary,
and introducing novel replication techniques when it
was necessary, we have constructed these services both
to be easily defensible against attack and to permit
key distribution despite the transient unavailability
of a substantial number of servers. We detail the
design and implementation of these services and the
secure process group abstraction they support. We also
give preliminary performance figures for some common
group operations.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "reliability; security",
subject = "{\bf C.2.0} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, General, Security and
protection (e.g., firewalls). {\bf C.2.4} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.6}
Software, OPERATING SYSTEMS, Security and Protection,
Authentication. {\bf D.4.6} Software, OPERATING
SYSTEMS, Security and Protection, Cryptographic
controls. {\bf K.6.5} Computing Milieux, MANAGEMENT OF
COMPUTING AND INFORMATION SYSTEMS, Security and
Protection, Authentication. {\bf E.3} Data, DATA
ENCRYPTION.",
}
@Article{Bates:1995:DHD,
author = "Peter C. Bates",
title = "Debugging Heterogeneous Distributed Systems Using
Event-Based Models of Behavior",
journal = j-TOCS,
volume = "13",
number = "1",
pages = "1--31",
month = feb,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-1/p1-bates/",
abstract = "We describe a high-level debugging approach,
Event-Based Behavioral Abstraction (EBBA), in which
debugging is treated as a process of creating models of
expected program behaviors and comparing these to the
actual behaviors exhibited by the program. The use of
EBBA techniques can enhance debugging-tool
transparency, reduce latency and uncertainty for
fundamental debugging activities, and accommodate
diverse, heterogeneous architectures. Using events and
behavior models as a basic mechanism provides a uniform
view of heterogeneous systems and enables analysis to
be performed in well-defined ways. Their use also
enables EBBA users to extend and reuse knowledge gained
in solving previous problems to new situations. We
describe our behavior-modeling algorithm that matches
actual behavior to models and automates many behavior
analysis steps. The algorithm matches behavior in as
many ways as possible and resolves these to return the
best match to the user. It deals readily with partial
behavior matches and incomplete information. In
particular, we describe a tool set we have built. The
tool set has been used to investigate the behavior of a
wide range of programs. The tools are modular and can
be distributed readily throughout a system.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; reliability",
subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing
and Debugging, Debugging aids. {\bf C.2.3} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Network Operations, Network monitoring. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, Distributed
applications. {\bf D.2.2} Software, SOFTWARE
ENGINEERING, Design Tools and Techniques, Programmer
workbench**. {\bf D.2.5} Software, SOFTWARE
ENGINEERING, Testing and Debugging, Monitors. {\bf
D.2.5} Software, SOFTWARE ENGINEERING, Testing and
Debugging, Tracing.",
}
@Article{Sugumar:1995:SAC,
author = "Rabin A. Sugumar and Santosh G. Abraham",
title = "Set-Associative Cache Simulation Using Generalized
Binomial Trees",
journal = j-TOCS,
volume = "13",
number = "1",
pages = "32--56",
month = feb,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-1/p32-sugumar/",
abstract = "Set-associative caches are widely used in CPU memory
hierarchies, I/O subsystems, and file systems to reduce
average access times. This article proposes an
efficient simulation technique for simulating a group
of set-associative caches in a single pass through the
address trace, where all caches have the same line size
but varying associativities and varying number of sets.
The article also introduces a generalization of the
ordinary binomial tree and presents a representation of
caches in this class using the Generalized Binomial
Tree (gbt). The tree representation permits efficient
search and update of the caches. Theoretically, the new
algorithm, GBF\_LS, based on the gbt structure, always
takes fewer comparisons than the two earlier algorithms
for the same class of caches: all-associativity and
generalized forest simulation. Experimentally, the new
algorithm shows performance gains in the range of 1.2
to 3.8 over the earlier algorithms on address traces of
the SPEC benchmarks. A related algorithm for simulating
multiple alternative direct-mapped caches with fixed
cache size, but varying line size, is also presented.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; measurement; performance",
subject = "{\bf B.3.3} Hardware, MEMORY STRUCTURES, Performance
Analysis and Design Aids**, Simulation**. {\bf E.1}
Data, DATA STRUCTURES, Trees. {\bf I.6.8} Computing
Methodologies, SIMULATION AND MODELING, Types of
Simulation. {\bf B.3.2} Hardware, MEMORY STRUCTURES,
Design Styles, Cache memories.",
}
@Article{Tullsen:1995:ECP,
author = "Dean M. Tullsen and Susan J. Eggers",
title = "Effective Cache Prefetching on Bus-Based
Multiprocessors",
journal = j-TOCS,
volume = "13",
number = "1",
pages = "57--88",
month = feb,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-1/p57-tullsen/",
abstract = "Compiler-directed cache prefetching has the potential
to hide much of the high memory latency seen by current
and future high-performance processors. However,
prefetching is not without costs, particularly on a
shared-memory multiprocessor. Prefetching can
negatively affect bus utilization, overall cache miss
rates, memory latencies and data sharing. We simulate
the effects of a compiler-directed prefetching
algorithm, running on a range of bus-based
multiprocessors. We show that, despite a high memory
latency, this architecture does not necessarily support
prefetching well, in some cases actually causing
performance degradations. We pinpoint several problems
with prefetching on a shared-memory architecture
(additional conflict misses, no reduction in the
data-sharing traffic and associated latencies, a
multiprocessor's greater sensitivity to memory
utilization and the sensitivity of the cache hit rate
to prefetch distance) and measure their effect on
performance. We then solve those problems through
architectural techniques and heuristics for prefetching
that could be easily incorporated into a compiler: (1)
victim caching, which eliminates most of the cache
conflict misses caused by prefetching in a
direct-mapped cache, (2) special prefetch algorithms
for shared data, which significantly improve the
ability of our basic prefetching algorithm to prefetch
individual misses, and (3) compiler-based shared-data
restructuring, which eliminates many of the
invalidation misses the basic prefetching algorithm
does not predict. The combined effect of these
improvements is to make prefetching effective over a
much wider range of memory architectures.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Shared memory. {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors).",
}
@Article{Akyurek:1995:ABR,
author = "Sedat Aky{\"u}rek and Kenneth Salem",
title = "Adaptive Block Rearrangement",
journal = j-TOCS,
volume = "13",
number = "2",
pages = "89--121",
month = may,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-2/p89-akyurek/",
abstract = "An adaptive technique for reducing disk seek times is
described. The technique copies frequently referenced
blocks from their original locations to reserved space
near the middle of the disk. Reference frequencies need
not be known in advance. Instead, they are estimated by
monitoring the stream of arriving requests.
Trace-driven simulations show that seek times can be
cut substantially by copying only a small number of
blocks using this technique. The technique has been
implemented by modifying a UNIX device driver. No
modifications are required to the file system that uses
the driver.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; experimentation; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements. {\bf H.3.2} Information
Systems, INFORMATION STORAGE AND RETRIEVAL, Information
Storage. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Modeling and prediction. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Simulation.
{\bf D.4.8} Software, OPERATING SYSTEMS, Performance.",
}
@Article{Hosseini-Khayat:1995:SEB,
author = "Saied Hosseini-Khayat and Andreas D. Bovopoulos",
title = "A Simple and Efficient Bus Management Scheme That
Supports Continuous Streams",
journal = j-TOCS,
volume = "13",
number = "2",
pages = "122--140",
month = may,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-2/p122-hosseini-khayat/",
abstract = "An efficient bandwidth management and access
arbitration scheme for an I/O bus in a multimedia
workstation is presented. It assumes that a multimedia
workstation consists of a number of processing modules
which are interconnected by a packet bus. The scheme is
efficient in the sense that it allows the bus to
support both continuous media transfers and regular
random transactions in such a way that continuous
streams can meet their real-time constraints
independently of random traffic, and random traffic is
not delayed significantly by continuous traffic except
when traffic load is very high. Implementation
guidelines are provided to show that the scheme is
practical. Finally, the performance of this scheme is
compared with alternative solutions through
simulation.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "experimentation; performance",
subject = "{\bf B.4.3} Hardware, INPUT/OUTPUT AND DATA
COMMUNICATIONS, Interconnections (Subsystems),
Topology. {\bf B.4.4} Hardware, INPUT/OUTPUT AND DATA
COMMUNICATIONS, Performance Analysis and Design Aids**,
Simulation**. {\bf H.5.1} Information Systems,
INFORMATION INTERFACES AND PRESENTATION, Multimedia
Information Systems. {\bf C.0} Computer Systems
Organization, GENERAL, System architectures.",
}
@Article{Singh:1995:IHB,
author = "Jaswinder Pal Singh and John L. Hennessy and Anoop
Gupta",
title = "Implications of Hierarchical {$N$}-Body Methods for
Multiprocessor Architectures",
journal = j-TOCS,
volume = "13",
number = "2",
pages = "141--202",
month = may,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-2/p141-singh/",
abstract = "To design effective large-scale multiprocessors,
designers need to understand the characteristics of the
applications that will use the machines. Application
characteristics of particular interest include the
amount of communication relative to computation, the
structure of the communication, and the local cache and
memory requirements, as well as how these
characteristics scale with larger problems and
machines. One important class of applications is based
on hierarchical N-body methods, which are used to solve
a wide range of scientific and engineering problems
efficiently. Important characteristics of these methods
include the nonuniform and dynamically changing nature
of the domains to which they are applied, and their use
of long-range, irregular communication. This article
examines the key architectural implications of
representative applications that use the two dominant
hierarchical N-body methods: the Barnes--Hut Method and
the Fast Multipole Method. We first show that
exploiting temporal locality on accesses to
communicated data is critical to obtaining good
performance on these applications and then argue that
coherent caches on shared-address-space machines
exploit this locality both automatically and very
effectively. Next, we examine the implications of
scaling the applications to run on larger machines. We
use scaling methods that reflect the concerns of the
application scientist and find that this leads to
different conclusions about how communication traffic
and local cache and memory usage scale than scaling
based only on data set size. In particular, we show
that under the most realistic form of scaling, both the
communication-to-computation ratio as well as the
working-set size (and hence the ideal cache size per
processor) grow slowly as larger problems are run on
larger machines. Finally, we examine the effects of
using the two dominant abstractions for interprocessor
communication: a shared address space and explicit
message passing between private address spaces. We show
that the lack of an efficiently supported shared
address space will substantially increase the
programming complexity and performance overheads for
these applications.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; experimentation; measurement;
performance",
subject = "{\bf C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors). {\bf C.0} Computer Systems
Organization, GENERAL, System architectures. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS.
{\bf C.5.1} Computer Systems Organization, COMPUTER
SYSTEM IMPLEMENTATION, Large and Medium (``Mainframe'')
Computers.",
}
@Article{Carter:1995:TRC,
author = "John B. Carter and John K. Bennett and Willy
Zwaenepoel",
title = "Techniques for Reducing Consistency-Related
Communication in Distributed Shared-Memory Systems",
journal = j-TOCS,
volume = "13",
number = "3",
pages = "205--243",
month = aug,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-3/p205-carter/",
abstract = "Distributed shared memory (DSM) is an abstraction of
shared memory on a distributed-memory machine. Hardware
DSM systems support this abstraction at the
architecture level; software DSM systems support the
abstraction within the runtime system. One of the key
problems in building an efficient software DSM system
is to reduce the amount of communication needed to keep
the distributed memories consistent. In this article we
present four techniques for doing so: software release
consistency; multiple consistency protocols;
write-shared protocols; and an update-with-timeout
mechanism. These techniques have been implemented in
the Munin DSM system. We compare the performance of
seven Munin application programs: first to their
performance when implemented using message passing, and
then to their performance when running on a
conventional software DSM system that does not embody
the preceding techniques. On a 16-processor cluster of
workstations, Munin's performance is within 5\% of
message passing for four out of the seven applications.
For the other three, performance is within 29 to 33\%.
Detailed analysis of two of these three applications
indicates that the addition of a function-shipping
capability would bring their performance to within 7\%
of the message-passing performance. Compared to a
conventional DSM system, Munin achieves performance
improvements ranging from a few to several hundred
percent, depending on the application.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Distributed memories. {\bf B.3.2} Hardware,
MEMORY STRUCTURES, Design Styles, Cache memories. {\bf
C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors), Interconnection architectures. {\bf
D.4.4} Software, OPERATING SYSTEMS, Communications
Management, Network communication. {\bf D.4.7}
Software, OPERATING SYSTEMS, Organization and Design,
Distributed systems. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf B.3.2}
Hardware, MEMORY STRUCTURES, Design Styles, Shared
memory. {\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Virtual memory. {\bf D.4.2} Software, OPERATING
SYSTEMS, Storage Management, Virtual memory.",
}
@Article{Diwan:1995:MSP,
author = "Amer Diwan and David Tarditi and Eliot Moss",
title = "Memory System Performance of Programs with Intensive
Heap Allocation",
journal = j-TOCS,
volume = "13",
number = "3",
pages = "244--273",
month = aug,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-3/p244-diwan/",
abstract = "Heap allocation with copying garbage collection is a
general storage management technique for programming
languages. It is believed to have poor memory system
performance. To investigate this, we conducted an
in-depth study of the memory system performance of heap
allocation for memory systems found on many machines.
We studied the performance of mostly functional
Standard ML programs which made heavy use of heap
allocation. We found that most machines support heap
allocation poorly. However, with the appropriate memory
system organization, heap allocation can have good
performance. The memory system property crucial for
achieving good performance was the ability to allocate
and initialize a new object into the cache without a
penalty. This can be achieved by having subblock by
placement with a subblock size of one word with a
write-allocate policy, along with fast page-mode writes
or a write buffer. For caches with subblock placement,
the data cache overhead was under 9\% for a 64K or
larger data cache; without subblock placement the
overhead was often higher than 50\%.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "experimentation; languages; measurement; performance",
subject = "{\bf D.3.3} Software, PROGRAMMING LANGUAGES, Language
Constructs and Features, Dynamic storage management.
{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design Styles,
Associative memories. {\bf B.3.3} Hardware, MEMORY
STRUCTURES, Performance Analysis and Design Aids**,
Simulation**. {\bf D.1.1} Software, PROGRAMMING
TECHNIQUES, Applicative (Functional) Programming. {\bf
D.3.2} Software, PROGRAMMING LANGUAGES, Language
Classifications. {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Cache memories. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF
SYSTEMS.",
}
@Article{Hartman:1995:ZSN,
author = "John H. Hartman and John K. Ousterhout",
title = "The {Zebra} Striped Network File System",
journal = j-TOCS,
volume = "13",
number = "3",
pages = "274--310",
month = aug,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-3/p274-hartman/",
abstract = "Zebra is a network file system that increases
throughput by striping the file data across multiple
servers. Rather than striping each file separately,
Zebra forms all the new data from each client into a
single stream, which it then stripes using an approach
similar to a log-structured file system. This provides
high performance for writes of small files as well as
for reads and writes of large files. Zebra also writes
parity information in each stripe in the style of RAID
disk arrays; this increases storage costs slightly, but
allows the system to continue operation while a single
storage server is unavailable. A prototype
implementation of Zebra, built in the Sprite operating
system, provides 4-5 times the throughput of the
standard Sprite file system or NFS for large files and
a 15-300\% improvement for writing small files.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; measurement; performance; reliability",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, File organization. {\bf D.4.2} Software,
OPERATING SYSTEMS, Storage Management,
Allocation/deallocation strategies. {\bf D.4.3}
Software, OPERATING SYSTEMS, File Systems Management,
Access methods. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7}
Software, OPERATING SYSTEMS, Organization and Design,
Distributed systems. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf E.5} Data,
FILES, Organization/structure. {\bf D.4.2} Software,
OPERATING SYSTEMS, Storage Management, Secondary
storage. {\bf D.4.3} Software, OPERATING SYSTEMS, File
Systems Management, Distributed file systems.",
}
@Article{Amir:1995:TSR,
author = "Y. Amir and L. E. Moser and P. M. Melliar-Smith and D.
A. Agarwal and P. Ciarfella",
title = "The {Totem} Single-Ring Ordering and Membership
Protocol",
journal = j-TOCS,
volume = "13",
number = "4",
pages = "311--342",
month = nov,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-4/p311-amir/",
abstract = "Fault-tolerant distributed systems are becoming more
important, but in existing systems, maintaining the
consistency of replicated data is quite expensive. The
Totem single-ring protocol supports consistent
concurrent operations by placing a total order on
broadcast messages. This total order is derived from
the sequence number in a token that circulates around a
logical ring imposed on a set of processors in a
broadcast domain. The protocol handles reconfiguration
of the system when processors fail and restart or when
the network partitions and remerges. Extended virtual
synchrony ensures that processors deliver messages and
configuration changes to the application in a
consistent, systemwide total order. An effective flow
control mechanism enables the Totem single-ring
protocol to achieve message-ordering rates
significantly higher than the best prior total-ordering
protocols.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "performance; reliability",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf C.2.1} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Architecture and Design, Network communications. {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf C.2.5} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Local
and Wide-Area Networks, Token rings. {\bf D.4.4}
Software, OPERATING SYSTEMS, Communications Management,
Network communication. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7}
Software, OPERATING SYSTEMS, Organization and Design,
Distributed systems.",
}
@Article{Herlihy:1995:SCC,
author = "Maurice Herlihy and Beng-Hong Lim and Nir Shavit",
title = "Scalable Concurrent Counting",
journal = j-TOCS,
volume = "13",
number = "4",
pages = "343--364",
month = nov,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-4/p343-herlihy/",
abstract = "The notion of counting is central to a number of basic
multiprocessor coordination problems, such as dynamic
load balancing, barrier synchronization, and concurrent
data structure design. We investigate the scalability
of a variety of counting techniques for large-scale
multiprocessors. We compare counting techniques based
on: (1) spin locks, (2) message passing, (3)
distributed queues, (4) software combining trees, and
(5) counting networks. Our comparison is based on a
series of simple benchmarks on a simulated 64-processor
Alewife machine, a distributed-memory multiprocessor
currently under development at MIT. Although locking
techniques are known to perform well on small-scale,
bus-based multiprocessors, serialization limits
performance, and contention can degrade performance.
Both counting networks and combining trees outperform
the other methods substantially by avoiding
serialization and alleviating contention, although
combining-tree throughput is more sensitive to
variations in load. A comparison of shared-memory and
message-passing implementations of counting networks
and combining trees shows that message-passing
implementations have substantially higher throughput.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; experimentation; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management,
Multiprocessing/multiprogramming/multitasking. {\bf
C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors), Multiple-instruction-stream,
multiple-data-stream processors (MIMD). {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Concurrency. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management, Scheduling. {\bf B.3.3} Hardware,
MEMORY STRUCTURES, Performance Analysis and Design
Aids**, Simulation**. {\bf E.1} Data, DATA STRUCTURES,
Lists, stacks, and queues. {\bf E.1} Data, DATA
STRUCTURES, Trees.",
}
@Article{Mandrioli:1995:GTC,
author = "Dino Mandrioli and Sandro Morasca and Angelo
Morzenti",
title = "Generating Test Cases for Real-Time Systems from Logic
Specifications",
journal = j-TOCS,
volume = "13",
number = "4",
pages = "365--398",
month = nov,
year = "1995",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1995-13-4/p365-mandrioli/",
abstract = "We address the problem of automated derivation of
functional test cases for real-time systems, by
introducing techniques for generating test cases from
formal specifications written in TRIO, a language that
extends classical temporal logic to deal explicitly
with time measures. We describe an interactive tool
that has been built to implement these techniques,
based on interpretation algorithms of the TRIO
language. Several heuristic criteria are suggested to
reduce drastically the size of the test cases that are
generated. Experience in the use of the tool on
real-life cases is reported.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; verification",
subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing
and Debugging, Testing tools (e.g., data generators,
coverage testing). {\bf B.6.2} Hardware, LOGIC DESIGN,
Reliability and Testing**, Test generation**. {\bf
B.6.3} Hardware, LOGIC DESIGN, Design Aids,
Verification. {\bf C.3} Computer Systems Organization,
SPECIAL-PURPOSE AND APPLICATION-BASED SYSTEMS,
Real-time and embedded systems. {\bf D.2.1} Software,
SOFTWARE ENGINEERING, Requirements/Specifications,
Languages. {\bf D.2.1} Software, SOFTWARE ENGINEERING,
Requirements/Specifications, Tools. {\bf B.6.3}
Hardware, LOGIC DESIGN, Design Aids, Hardware
description languages.",
}
@Article{Chen:1996:MPP,
author = "J. Bradley Chen and Yasuhiro Endo and Kee Chan and
David Mazi{\`e}res and Antonio Dias and Margo Seltzer
and Michael D. Smith",
title = "The Measured Performance of Personal Computer
Operating Systems",
journal = j-TOCS,
volume = "14",
number = "1",
pages = "3--40",
month = feb,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p3-chen/",
abstract = "This article presents a comparative study of the
performance of three operating systems that run on the
personal computer architecture derived form the IBM-PC.
The operating systems, Windows for Workgroups, Windows
NT, and NetBSD (a freely available variant of the UNIX
operating system), cover a broad range of system
functionality and user requirements, from a
single-address-space model to full protection with
preemptive multitasking. Our measurements are enable by
hardware counters in Intel's Pentium processor that
permit measurement of a broad range of processor events
including instruction counts and on-chip cache miss
counts. We use both microbenchmarks, which expose
specific difference between the systems, and
application workloads, which provide an indication of
expected end-to-end performance. Our microbenchmark
results show that accessing system functionality is
often more expensive in Windows for Workgroups than in
the other two systems due to frequent changes in
machine mode and the use of system call hooks. When
running native applications, Windows NT is more
efficient than Windows, but it incurs overhead similar
to that of a microkernel, since its application
interface (the Win32 API) is implemented as a
user-level server. Overall, system functionality can be
accessed most efficiently in NetBSD; we attribute this
to its monolithic structure and to the absence of the
complications created by hardware
backward-compatibility requirements in the other
systems. Measurements of application performance show
that although the impact of these differences is
significant in terms of instruction counts and other
hardware events (often a factor of 2 to 7 difference
between the systems), overall performance is sometimes
determined by the functionality provided by specific
subsystems, such as the graphics subsystem or the file
system buffer cache.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "measurement; performance",
subject = "{\bf D.4.8} Software, OPERATING SYSTEMS, Performance.
{\bf C.4} Computer Systems Organization, PERFORMANCE OF
SYSTEMS. {\bf D.4.0} Software, OPERATING SYSTEMS,
General. {\bf D.4.7} Software, OPERATING SYSTEMS,
Organization and Design.",
}
@Article{Anderson:1996:SNF,
author = "Thomas E. Anderson and Michael D. Dahlin and Jeanna M.
Neefe and David A. Patterson and Drew S. Roselli and
Randolph Y. Wang",
title = "Serverless Network File Systems",
journal = j-TOCS,
volume = "14",
number = "1",
pages = "41--79",
month = feb,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p41-anderson/",
abstract = "We propose a new paradigm for network file system
design: {\em serverless network file systems}. While
traditional network file systems rely on a central
server machine, a serverless system utilizes
workstations cooperating as peers to provide all file
system services. Any machine in the system can store,
cache, or control any block of data. Our approach uses
this location independence, in combination with fast
local area networks, to provide better performance and
scalability than traditional file systems. Furthermore,
because any machine in the system can assume the
responsibilities of a failed component, our serverless
design also provides high availability via redundant
data storage. To demonstrate our approach, we have
implemented a prototype serverless network file system
called xFS. Preliminary performance measurements
suggest that our architecture achieves its goal of
scalability. For instance, in a 32-node xFS system with
32 active clients, each client receives nearly as much
read or write throughput as it would see if it were the
only active client.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; measurement; performance;
reliability",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Access methods. {\bf D.4.2} Software,
OPERATING SYSTEMS, Storage Management,
Allocation/deallocation strategies. {\bf D.4.5}
Software, OPERATING SYSTEMS, Reliability,
Checkpoint/restart. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf E.5} Data,
FILES, Organization/structure. {\bf H.3.2} Information
Systems, INFORMATION STORAGE AND RETRIEVAL, Information
Storage, File organization. {\bf D.4.2} Software,
OPERATING SYSTEMS, Storage Management, Secondary
storage. {\bf D.4.3} Software, OPERATING SYSTEMS, File
Systems Management, Directory structures. {\bf D.4.3}
Software, OPERATING SYSTEMS, File Systems Management,
Distributed file systems. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, File
organization. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Simulation. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, Network operating
systems.",
}
@Article{Bressoud:1996:HBF,
author = "Thomas C. Bressoud and Fred B. Schneider",
title = "Hypervisor-Based Fault Tolerance",
journal = j-TOCS,
volume = "14",
number = "1",
pages = "80--107",
month = feb,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p80-bressoud/",
abstract = "Protocols to implement a fault-tolerant computing
system are described. These protocols augment the
hypervisor of a virtual-machine manager and coordinate
a primary virtual machine with its backup. No
modifications to the hardware, operating system, or
application programs are required. A prototype system
was constructed for HP's PA-RISC instruction-set
architecture. Even though the prototype was not
carefully tuned, it ran programs about a factor of 2
slower than a bare machine would.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; reliability",
subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Network operating systems. {\bf
D.4.5} Software, OPERATING SYSTEMS, Reliability,
Checkpoint/restart.",
}
@Article{Wilkes:1996:HAH,
author = "John Wilkes and Richard Golding and Carl Staelin and
Tim Sullivan",
title = "The {HP AutoRAID} Hierarchical Storage System",
journal = j-TOCS,
volume = "14",
number = "1",
pages = "108--136",
month = feb,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-1/p108-wilkes/",
abstract = "Configuring redundant disk arrays is a black art. To
configure an array properly, a system administrator
must understand the details of both the array and the
workload it will support. Incorrect understanding of
either, or changes in the workload over time, can lead
to poor performance. We present a solution to this
problem: a two-level storage hierarchy implemented
inside a single disk-array controller. In the upper
level of this hierarchy, two copies of active data are
stored to provide full redundancy and excellent
performance. In the lower level, RAID 5 parity
protection is used to provide excellent storage cost
for inactive data, at somewhat lower performance. The
technology we describe in this article, know as HP
AutoRAID, automatically and transparently manages
migration of data blocks between these two levels as
access patterns change. The result is a fully redundant
storage system that is extremely easy to use, is
suitable for a wide variety of workloads, is largely
insensitive to dynamic workload changes, and performs
much better than disk arrays with comparable numbers of
spindles and much larger amounts of front-end RAM
cache. Because the implementation of the HP AutoRAID
technology is almost entirely in software, the
additional hardware cost for these benefits is very
small. We describe the HP AutoRAID technology in
detail, provide performance data for an embodiment of
it in a storage array, and summarize the results of
simulation studies used to choose algorithms
implemented in the array.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance; reliability",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Secondary storage. {\bf B.4.2} Hardware,
INPUT/OUTPUT AND DATA COMMUNICATIONS, Input/Output
Devices, Channels and controllers. {\bf B.4.5}
Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS,
Reliability, Testing, and Fault-Tolerance**, Redundant
design**. {\bf B.3.2} Hardware, MEMORY STRUCTURES,
Design Styles, Mass storage.",
}
@Article{Grimshaw:1996:PRT,
author = "Andrew S. Grimshaw and Jon B. Weissman and W. Timothy
Strayer",
title = "Portable Run-Time Support for Dynamic Object-Oriented
Parallel Processing",
journal = j-TOCS,
volume = "14",
number = "2",
pages = "139--170",
month = may,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-2/p139-grimshaw/",
abstract = "Mentat is an object-oriented parallel processing
system designed to simplify the task of writing
portable parallel programs for parallel machines and
workstation networks. The Mentat compiler and run-time
system work together to automatically manage the
communication and synchronization between objects. The
run-time system marshals member function arguments,
schedules objects on processors, and dynamically
constructs and executes large-grain data dependence
graphs. In this article we present the Mentat run-time
system. We focus on three aspects---the software
architecture, including the interface to the compiler
and the structure and interaction of the principle
components of the run-time system; the run-time
overhead on a component-by-component basis for two
platforms, a Sun SPARCstation 2 and an Intel Paragon;
and an analysis of the minimum granularity required for
application programs to overcome the run-time
overhead.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "languages; performance",
subject = "{\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Run-time environments. {\bf D.1.3}
Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Parallel programming. {\bf D.1.5}
Software, PROGRAMMING TECHNIQUES, Object-oriented
Programming. {\bf D.3.2} Software, PROGRAMMING
LANGUAGES, Language Classifications, Concurrent,
distributed, and parallel languages. {\bf D.3.2}
Software, PROGRAMMING LANGUAGES, Language
Classifications, Object-oriented languages.",
}
@Article{Hardy:1996:CIE,
author = "Darren R. Hardy and Michael F. Schwartz",
title = "Customized Information Extraction as a Basis for
Resource Discovery",
journal = j-TOCS,
volume = "14",
number = "2",
pages = "171--199",
month = may,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-2/p171-hardy/",
abstract = "Indexing file contents is a powerful means of helping
users locate documents, software, and other types of
data among large repositories. In environments that
contain many different types of data, content indexing
requires type-specific processing to extract
information effectively. We present a model for
type-specific, user-customizable information
extraction, and a system implementation called {\em
Essence}. This software structure allows users to
associate specialized extraction methods with ordinary
files, providing the illusion of an object-oriented
file system that encapsulates indexing methods within
files. By exploiting the semantics of common file
types, Essence generates compact yet representative
file summaries that can be used to improve both
browsing and indexing in resource discovery systems.
Essence can extract information from most of the types
of files found in common file systems, including files
with nested structure (such as compressed ``tar''
files). Essence interoperates with a number of
different search/index systems (such as WAIS and
Glimpse), as part of the Harvest system.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement",
subject = "{\bf H.3.1} Information Systems, INFORMATION STORAGE
AND RETRIEVAL, Content Analysis and Indexing. {\bf
H.3.4} Information Systems, INFORMATION STORAGE AND
RETRIEVAL, Systems and Software, Information networks.
{\bf E.5} Data, FILES, Organization/structure. {\bf
H.5.2} Information Systems, INFORMATION INTERFACES AND
PRESENTATION, User Interfaces.",
}
@Article{Spasojevic:1996:ESW,
author = "Mirjana Spasojevic and M. Satyanarayanan",
title = "An Empirical Study of a Wide-Area Distributed File
System",
journal = j-TOCS,
volume = "14",
number = "2",
pages = "200--222",
month = may,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-2/p200-spasojevic/",
abstract = "The evolution of the Andrew File System (AFS) into a
wide-area distributed file system has encouraged
collaboration and information dissemination on a much
broader scale than ever before. We examine AFS as a
provider of wide-area file services to over 100
organizations around the world. We discuss usage
characteristics of AFS derived from empirical
measurements of the system. Our observations indicate
that AFS provides robust and efficient data access in
its current configuration, thus confirming its
viability as a design point for wide-area distributed
file systems.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Distributed file systems. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance,
Measurements.",
}
@Article{Corbett:1996:VPF,
author = "Peter F. Corbett and Dror G. Feitelson",
title = "The {Vesta} Parallel File System",
journal = j-TOCS,
volume = "14",
number = "3",
pages = "225--264",
month = aug,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-3/p225-corbett/",
abstract = "The Vesta parallel file system is designed to provide
parallel file access to application programs running on
multicomputers with parallel I/O subsystems. Vesta uses
a new abstraction of files: a file is not a sequence of
bytes, but rather it can be partitioned into multiple
disjoint sequences that are accessed in parallel. The
partitioning---which can also be changed
dynamically---reduces the need for synchronization and
coordination during the access. Some control over the
layout of data is also provided, so the layout can be
matched with the anticipated access patterns. The
system is fully implemented and forms the basis for the
AIX Parallel I/O File System on the IBM SP2. The
implementation does not compromise scalability or
parallelism. In fact, all data accesses are done
directly to the I/O node that contains the requested
data, without any indirection or access to shared
metadata. Disk mapping and caching functions are
confined to each I/O node, so there is no need to keep
data coherent across nodes. Performance measurements
shown good scalability with increased resources.
Moreover, different access patterns are show to achieve
similar performance.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Distributed file systems. {\bf D.4.4}
Software, OPERATING SYSTEMS, Communications Management,
Input/output. {\bf E.5} Data, FILES,
Organization/structure. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming,
Parallel programming. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Concurrency. {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors),
Parallel processors**.",
}
@Article{Cristian:1996:FTA,
author = "Flaviu Cristian and Bob Dancey and Jon Dehn",
title = "Fault-tolerance in Air Traffic Control Systems",
journal = j-TOCS,
volume = "14",
number = "3",
pages = "265--286",
month = aug,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-3/p265-cristian/",
abstract = "The distributed real-time system services developed by
Lockheed Martin's Air Traffic Management group serve
the infrastructure for a number of air traffic control
systems. Either completed development or under
development are the US Federal Aviation
Administration's Display System Replacement (DSR)
system, the UK Civil Aviation Authority's New Enroute
Center (NERC) system, and the Republic of China's Air
Traffic Control Automated System (ATCAS). These systems
are intended to replace present en route systems over
the next decade. High availability of air traffic
control services is an essential requirement of these
systems. This article discusses the general approach to
fault-tolerance adopted in this infrastructure, by
reviewing some of the questions which were asked during
the system design, various alternative solutions
considered, and the reasons for the design choices
made. The aspects of this infrastructure chosen for the
individual ATC systems mentioned above, along with the
status of those systems, are presented in the Section
11 of the article.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; reliability",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Reliability, availability, and
serviceability. {\bf D.2.5} Software, SOFTWARE
ENGINEERING, Testing and Debugging, Error handling and
recovery. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance. {\bf J.7} Computer
Applications, COMPUTERS IN OTHER SYSTEMS, Real time.
{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Real-time systems and embedded systems.",
}
@Article{Devarakonda:1996:RCF,
author = "Murthy Devarakonda and Bill Kish and Ajay Mohindra",
title = "Recovery in the {Calypso} File System",
journal = j-TOCS,
volume = "14",
number = "3",
pages = "287--310",
month = aug,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-3/p287-devarakonda/",
abstract = "This article presents the deign and implementation of
the recovery scheme in Calypso. Calypso is a
cluster-optimized, distributed file system for UNIX
clusters. As in Sprite and AFS, Calypso servers are
stateful and scale well to a large number of clients.
The recovery scheme in Calypso is nondisruptive,
meaning that open files remain open, client modified
data are saved, and in-flight operations are properly
handled across server recover. The scheme uses
distributed state amount the clients to reconstruct the
server state on a backup node if disks are multiported
or on the rebooted server node. It guarantees data
consistency during recovery and provides congestion
control. Measurements show that the state
reconstruction can be quite fast: for example, in a
32-node cluster, when an average node contains state
for about 420 files, the reconstruction time is about
3.3 seconds. However, the time to update a file system
after a failure can be a major factor in the overall
recovery time, even when using journaling techniques.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; management; measurement; reliability",
subject = "{\bf D.4.5} Software, OPERATING SYSTEMS, Reliability,
Fault-tolerance. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS. {\bf D.4.3}
Software, OPERATING SYSTEMS, File Systems Management,
Distributed file systems. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems. {\bf E.5} Data, FILES, Backup/recovery.",
}
@Article{Cao:1996:IPI,
author = "Pei Cao and Edward W. Felten and Anna R. Karlin and
Kai Li",
title = "Implementation and Performance of Integrated
Application-Controlled File Caching, Prefetching, and
Disk Scheduling",
journal = j-TOCS,
volume = "14",
number = "4",
pages = "311--343",
month = nov,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p311-cao/",
abstract = "As the performance gap between disks and
microprocessors continues to increase, effective
utilization of the file cache becomes increasingly
important. Application-controlled file caching and
prefetching can apply application-specific knowledge to
improve file cache management. However, supporting
application-controlled file caching and prefetching is
nontrivial because caching and prefetching need to be
integrated carefully, and the kernel needs to allocate
cache blocks among processes appropriately. This
article presents the design, implementation, and
performance of a file system that integrates
application-controlled caching, prefetching, and disk
scheduling. We use a two-level cache management
strategy. The kernel uses the LRU-SP
(Least-Recently-Used with Swapping and Placeholders)
policy to allocate blocks to processes, and each
process integrates application-specific caching and
prefetching based on the {\em controlled-aggressive\/}
policy, an algorithm previously shown in a theoretical
sense to be nearly optimal. Each process also improves
its disk access latency by submitting its prefetches in
batches so that the requests can be scheduled to
optimize disk access performance. Our measurements show
that this combination of techniques greatly improves
the performance of the file system. We measured that
the running time is reduced by 3\% to 49\% (average
26\%) for single-process workloads and by 5\% to 76\%
(average 32\%) for multiprocess workloads.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; experimentation; measurement;
performance",
subject = "{\bf D.4.2} Software, OPERATING SYSTEMS, Storage
Management, Secondary storage. {\bf C.4} Computer
Systems Organization, PERFORMANCE OF SYSTEMS, Design
studies. {\bf D.4.2} Software, OPERATING SYSTEMS,
Storage Management, Storage hierarchies. {\bf D.4.3}
Software, OPERATING SYSTEMS, File Systems Management,
Access methods. {\bf D.4.8} Software, OPERATING
SYSTEMS, Performance, Measurements. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance, Modeling and
prediction. {\bf E.5} Data, FILES, Optimization**.",
}
@Article{Saavedra:1996:ABC,
author = "Rafael H. Saavedra and Alan J. Smith",
title = "Analysis of Benchmark Characteristics and Benchmark
Performance Prediction",
journal = j-TOCS,
volume = "14",
number = "4",
pages = "344--384",
month = nov,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p344-saavedra/",
abstract = "Standard benchmarking provides to run-times for given
programs on given machines, but fails to provide
insight as to why those results were obtained (either
in terms of machine or program characteristics) and
fails to provide run-times for that program on some
other machine, or some other programs on that machine.
We have developed a machine-independent model of
program execution to characterize both machine
performance and program execution. By merging these
machine and program characterizations, we can estimate
execution time for arbitrary machine/program
combinations. Our technique allows us to identify those
operations, either on the machine or in the programs,
which dominate the benchmark results. This information
helps designers in improving the performance of future
machines and users in tuning their applications to
better utilize the performance of existing machines.
Here we apply our methodology to characterize
benchmarks and predict their execution times. We
present extensive run-time statistics for a large set
of benchmarks including the SPEC and Perfect Club
suites. We show how these statistics can be used to
identify important shortcoming in the programs. In
addition, we give execution time estimates for a large
sample of programs and machines and compare these
against benchmark results. Finally, we develop a metric
for program similarity that makes it possible to
classify benchmarks with respect to a large set of
characteristics.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "measurement; performance",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Measurement techniques. {\bf C.4} Computer
Systems Organization, PERFORMANCE OF SYSTEMS, Modeling
techniques. {\bf C.4} Computer Systems Organization,
PERFORMANCE OF SYSTEMS, Performance attributes. {\bf
D.2.8} Software, SOFTWARE ENGINEERING, Metrics,
Performance measures. {\bf I.6.4} Computing
Methodologies, SIMULATION AND MODELING, Model
Validation and Analysis.",
}
@Article{Shavit:1996:DT,
author = "Nir Shavit and Asaph Zemach",
title = "Diffracting Trees",
journal = j-TOCS,
volume = "14",
number = "4",
pages = "385--428",
month = nov,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p385-shavit/",
abstract = "Shared counters are among the most basic coordination
structures in multiprocessor computation, with
applications ranging from barrier synchronization to
concurrent-data-structure design. This article
introduces diffracting trees, novel data structures for
share counting and load balancing in a
distributed/parallel environment. Empirical evidence,
collected on a simulated distributed shared-memory
machine and several simulated message-passing
architectures, shows that diffracting trees scale
better and are more robust than both combining trees
and counting networks, currently the most effective
known methods for implementing concurrent counters in
software. The use of a randomized coordination method
together with a combinatorial data structure overcomes
the resiliency drawbacks of combining trees. Our
simulations show that to handle the same load,
diffracting trees and counting networks should have a
similar width {\em w}, yet the depth of a diffracting
tree is {\em O\/}(log {\em w\/}), whereas counting
networks have depth {\em O\/}(log2 {\em w\/}).
Diffracting trees have already been used to implement
highly efficient producer/consumer queues, and we
believe diffraction will prove to be an effective
alternative paradigm to combining and queue-locking in
the design of many concurrent data structures.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf E.1} Data, DATA STRUCTURES. {\bf C.1.2} Computer
Systems Organization, PROCESSOR ARCHITECTURES, Multiple
Data Stream Architectures (Multiprocessors). {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems.
{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Synchronization. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design, Distributed
systems.",
}
@Article{Anonymous:1996:AI,
author = "Anonymous",
title = "Author Index",
journal = j-TOCS,
volume = "14",
number = "4",
pages = "429--430",
month = nov,
year = "1996",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1996-14-4/p429-author_index/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
subject = "{\bf A.2} General Literature, REFERENCE.",
}
@Article{Birman:1997:EEP,
author = "Kenneth P. Birman",
title = "Editorial: Electronic Publication of {TOCS}",
journal = j-TOCS,
volume = "15",
number = "1",
pages = "1--1",
month = feb,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p1-birman/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kontothanassis:1997:SCS,
author = "Leonidas I. Kontothanassis and Robert W. Wisniewski
and Michael L. Scott",
title = "Scheduler-Conscious Synchronization",
journal = j-TOCS,
volume = "15",
number = "1",
pages = "3--40",
month = feb,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p3-kontothanassis/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance; reliability",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Synchronization. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming,
Parallel programming. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking.",
}
@Article{Kotz:1997:DDM,
author = "David Kotz",
title = "Disk-Directed {I/O} for {MIMD} Multiprocessors",
journal = j-TOCS,
volume = "15",
number = "1",
pages = "41--74",
month = feb,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p41-kotz/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Access methods. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, File
organization. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Simulation. {\bf E.5} Data, FILES.",
}
@Article{Steenkiste:1997:HSN,
author = "Peter Steenkiste",
title = "A High-Speed Network Interface for Distributed-Memory
Systems: Architecture and Applications",
journal = j-TOCS,
volume = "15",
number = "1",
pages = "75--109",
month = feb,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-1/p75-steenkiste/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "management; performance; reliability",
subject = "{\bf D.4.4} Software, OPERATING SYSTEMS,
Communications Management, Network communication. {\bf
B.4.3} Hardware, INPUT/OUTPUT AND DATA COMMUNICATIONS,
Interconnections (Subsystems), Interfaces. {\bf C.0}
Computer Systems Organization, GENERAL, System
architectures. {\bf C.2.2} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Protocols, Protocol architecture. {\bf D.4.4} Software,
OPERATING SYSTEMS, Communications Management,
Buffering.",
}
@Article{Anderson:1997:DRA,
author = "David P. Anderson",
title = "Device Reservation in Audio\slash Video Editing
Systems",
journal = j-TOCS,
volume = "15",
number = "2",
pages = "111--133",
month = may,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p111-anderson/",
abstract = "What fraction of disks and other shared devices must
be reserved to play an audio/video document without
dropouts? In general, this question cannot be answered
precisely. For documents with complex and irregular
structure, such as those arising in audio/video
editing, it is difficult even to give a good estimate.
We describe three approaches to this problem. The
first, based on long-term average properties of
segments, is fast but imprecise: it underreserves in
some cases and overreserves in others. The second
approach models individual disk and network operations.
It is precise but slow. The third approach, a hybrid,
is both precise and fast.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; performance",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design, Real-time systems and embedded systems.
{\bf D.4.8} Software, OPERATING SYSTEMS, Performance,
Modeling and prediction. {\bf H.5.1} Information
Systems, INFORMATION INTERFACES AND PRESENTATION,
Multimedia Information Systems, Audio input/output.
{\bf H.5.1} Information Systems, INFORMATION INTERFACES
AND PRESENTATION, Multimedia Information Systems, Video
(e.g., tape, disk, DVI).",
}
@Article{Anderson:1997:RTC,
author = "James H. Anderson and Srikanth Ramamurthy and Kevin
Jeffay",
title = "Real-time Computing with Lock-Free Shared Objects",
journal = j-TOCS,
volume = "15",
number = "2",
pages = "134--165",
month = may,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p134-anderson/",
abstract = "This article considers the use of lock-free shared
objects within hard real-time systems. As the name
suggests, {\em lock-free\/} shared objects are
distinguished by the fact that they are accessed
without locking. As such, they do not give rise to
priority inversions, a key advantage over conventional,
lock-based object-sharing approaches. Despite this
advantage, it is not immediately apparent that
lock-free shared objects can be employed if tasks must
adhere to strict timing constraints. In particular,
lock-free object implementations permit concurrent
operations to interfere with each other, and repeated
interferences can cause a given operation to take an
arbitrarily long time to complete. The main
contribution of this article is to show that such
interferences can be bounded by judicious scheduling.
This work pertains to periodic, hard real-time tasks
that share lock-free objects on a uniprocessor. In the
first part of the article, scheduling conditions are
derived for such tasks, for both static and dynamic
priority schemes. Based on these conditions, it is
formally shown that lock-free shared objects often
incur less overhead than object implementations based
on wait-free algorithms or lock-based schemes. In the
last part of the article, this conclusion is validated
experimentally through work involving a real-time
desktop videoconferencing system.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance; theory",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Concurrency. {\bf C.3} Computer Systems
Organization, SPECIAL-PURPOSE AND APPLICATION-BASED
SYSTEMS, Real-time and embedded systems. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Mutual exclusion. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Scheduling. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Synchronization. {\bf J.7} Computer Applications,
COMPUTERS IN OTHER SYSTEMS, Real time.",
}
@Article{Mahmood:1997:OAM,
author = "Ausif Mahmood and Donald J. Lynch and Roger B.
Shaffer",
title = "Optimally Adaptive, Minimum-Distance, Circuit-Switched
Routing in Hypercubes",
journal = j-TOCS,
volume = "15",
number = "2",
pages = "166--193",
month = may,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p166-mahmood/",
abstract = "In circuit-switched routing, the path between a source
and its destination is established by incrementally
reserving all required links before the data
transmission can begin. If the routing algorithm is not
carefully designed, deadlocks can occur in reserving
these links. Deadlock-free algorithms based on
dimension-ordered routing, such as the {\em E-cube},
exist. However, {\em E-cube\/} does not provide any
flexibility in choosing a path from a source to its
destination and can thus result in long latencies under
heavy or uneven traffic. Adaptive, minimum-distance
routing algorithms, such as the {\em Turn Model\/} and
the {\em UP Preference\/} algorithms, have previously
been reported. In this article, we present a new class
of adaptive, provably deadlock-free, minimum-distance
routing algorithms. We prove that the algorithms
developed here are optimally adaptive in the sense that
any further flexibility in communication will result in
deadlock. We show that the {\em Turn Model\/} is
actually a member of our new class of algorithms that
does not perform as well as other algorithms within the
new class. It creates artificial hotspots in routing
the traffic and allows fewer total paths. We present an
analytical comparison of the flexibility and balance in
routing provided by various algorithms and a comparison
based on uniform and nonuniform traffic simulations.
The {\em Extended UP Preference\/} algorithm developed
in this article is shown to have improved performance
with respect to existing algorithms. The methodology
and the algorithms developed here can be used to
develop routing for other schemes such as wormhole
routing, and for other recursively defined networks
such as {\em k\/}-ary {\em n\/}-cubes.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; theory",
subject = "{\bf C.2.1} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Architecture
and Design, Network communications. {\bf C.1.2}
Computer Systems Organization, PROCESSOR ARCHITECTURES,
Multiple Data Stream Architectures (Multiprocessors),
Interconnection architectures. {\bf C.2.1} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
Network Architecture and Design, Network topology.",
}
@Article{Pfitzmann:1997:SLT,
author = "Birgit Pfitzmann and Michael Waidner",
title = "Strong Loss Tolerance of Electronic Coin Systems",
journal = j-TOCS,
volume = "15",
number = "2",
pages = "194--213",
month = may,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-2/p194-pfitzmann/",
abstract = "Untraceable electronic cash means prepaid digital
payment systems, usually with offline payments, that
protect user privacy. Such systems have recently been
given considerable attention by both theory and
development projects. However, in most current schemes,
loss of a user device containing electronic cash
implies a loss of money, just as with real cash. In
comparison with credit schemes, this is considered a
serious shortcoming. This article shows how untraceable
electronic cash can be made loss tolerant, i.e., how
the monetary value of the lost data can be recovered.
Security against fraud and preservation of privacy are
ensured; strong loss tolerance means that not even
denial of recovery is possible. In particular, systems
based on electronic coins are treated. We present
general design principles and options and their
instantiation in one concrete payment system. The
measures are practical.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; reliability; security",
subject = "{\bf D.4.6} Software, OPERATING SYSTEMS, Security and
Protection, Cryptographic controls. {\bf C.2.4}
Computer Systems Organization, COMPUTER-COMMUNICATION
NETWORKS, Distributed Systems, Distributed
applications. {\bf D.4.5} Software, OPERATING SYSTEMS,
Reliability, Fault-tolerance. {\bf H.4.3} Information
Systems, INFORMATION SYSTEMS APPLICATIONS,
Communications Applications. {\bf K.6.5} Computing
Milieux, MANAGEMENT OF COMPUTING AND INFORMATION
SYSTEMS, Security and Protection. {\bf K.4.0} Computing
Milieux, COMPUTERS AND SOCIETY, General.",
}
@Article{Mogul:1997:ERL,
author = "Jeffrey C. Mogul and K. K. Ramakrishnan",
title = "Eliminating Receive Livelock in an Interrupt-Driven
Kernel",
journal = j-TOCS,
volume = "15",
number = "3",
pages = "217--252",
month = aug,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p217-mogul/",
abstract = "Most operating systems use interface interrupts to
schedule network tasks. Interrupt-driven systems can
provide low overhead and good latency at low offered
load, but degrade significantly at higher arrival rates
unless care is taken to prevent several pathologies.
These are various forms of{\bf receive livelock}, in
which the system spends all of its time processing
interrupts, to the exclusion of other necessary tasks.
Under extreme conditions, no packets are delivered to
the user application or the output of the system. To
avoid livelock and related problems, an operating
system must schedule network interrupt handling as
carefully as it schedules process execution. We
modified an interrupt-driven networking implementation
to do so; this modification eliminates receive livelock
without degrading other aspects of system performance.
Our modifications include the use of polling when the
system is heavily loaded, while retaining the use of
interrupts ur.Jer lighter load. We present measurements
demonstrating the success of our approach.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management, Scheduling. {\bf D.4.4} Software, OPERATING
SYSTEMS, Communications Management, Input/output. {\bf
D.4.4} Software, OPERATING SYSTEMS, Communications
Management, Network communication. {\bf C.2.0} Computer
Systems Organization, COMPUTER-COMMUNICATION NETWORKS,
General.",
}
@Article{Harchol-Balter:1997:EPL,
author = "Mor Harchol-Balter and Allen B. Downey",
title = "Exploiting Process Lifetime Distributions for Dynamic
Load Balancing",
journal = j-TOCS,
volume = "15",
number = "3",
pages = "253--285",
month = aug,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p253-harchol-balter/",
abstract = "We consider policies for CPU load balancing in
networks of workstations. We address the question of
whether preemptive migration (migrating active
processes) is necessary, or whether remote execution
(migrating processes only at the time of birth) is
sufficient for load balancing. We show that resolving
this issue is strongly tied to understanding the
process lifetime distribution. Our measurements
indicate that the distribution of lifetimes for a UNIX
process is Pareto (heavy-tailed), with a consistent
functional form over a variety of workloads. We show
how to apply this distribution to derive a preemptive
migration policy that requires no hand-tuned
parameters. We used a trace-driven simulation to show
that our preemptive migration strategy is far more
effective than remote execution, even when the memory
transfer cost is high.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; measurement; performance",
subject = "{\bf C.2.3} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Operations,
Network management. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS. {\bf C.5.3}
Computer Systems Organization, COMPUTER SYSTEM
IMPLEMENTATION, Microcomputers. {\bf G.3} Mathematics
of Computing, PROBABILITY AND STATISTICS. {\bf G.m}
Mathematics of Computing, MISCELLANEOUS. {\bf I.6.0}
Computing Methodologies, SIMULATION AND MODELING,
General. {\bf C.2.3} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Operations,
Network monitoring.",
}
@Article{Krieger:1997:HPO,
author = "Orran Krieger and Michael Stumm",
title = "{HFS}: a Performance-Oriented Flexible File System
Based on Building-Block Compositions",
journal = j-TOCS,
volume = "15",
number = "3",
pages = "286--321",
month = aug,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p286-krieger/",
abstract = "The Hurricane File System (HFS) is designed for
(potentially large-scale) shared-memory
multiprocessors. Its architecture is based on the
principle that, in order to maximize performance for
applications with diverse requirements, a file system
must support a wide variety of file structures, file
system policies, and I/O interfaces. Files in HFS are
implemented using simple building blocks composed in
potentially complex ways. This approach yields great
flexibility, allowing an application to customize the
structure and policies of a file to exactly meet its
requirements. As an extreme example, HFS allows a
file's structure to be optimized for concurrent
random-access write-only operations by 10 threads,
something no other file system can do. Similarly, the
prefetching, locking, and file cache management
policies can all be chosen to match an application's
access pattern. In contrast, most parallel file systems
support a single file structure and a small set of
policies. We have implemented HFS as part of the
Hurricane operating system running on the Hector
shared-memory multiprocessor. We demonstrate that the
flexibility of HFS comes with little processing or I/O
overhead. We also show that for a number of file access
patterns, HFS is able to deliver to the applications
the full I/O bandwidth of the disks on our system.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, File organization. {\bf D.4.3} Software,
OPERATING SYSTEMS, File Systems Management, Access
methods. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements. {\bf E.5} Data, FILES,
Optimization**. {\bf E.5} Data, FILES,
Organization/structure.",
}
@Article{Lo:1997:CTL,
author = "Jack L. Lo and Joel S. Emer and Henry M. Levy and
Rebecca L. Stamm and Dean M. Tullsen",
title = "Converting Thread-Level Parallelism to
Instruction-Level Parallelism via Simultaneous
Multithreading",
journal = j-TOCS,
volume = "15",
number = "3",
pages = "322--354",
month = aug,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-3/p322-lo/",
abstract = "To achieve high performance, contemporary computer
systems rely on two forms of parallelism:
instruction-level parallelism (ILP) and thread-level
parallelism (TLP). Wide-issue super-scalar processors
exploit ILP by executing multiple instructions from a
single program in a single cycle. Multiprocessors (MP)
exploit TLP by executing different threads in parallel
on different processors. Unfortunately, both parallel
processing styles statically partition processor
resources, thus preventing them from adapting to
dynamically changing levels of ILP and TLP in a
program. With insufficient TLP, processors in an MP
will be idle; with insufficient ILP, multiple-issue
hardware on a superscalar is wasted. This article
explores parallel processing on an alternative
architecture, simultaneous multithreading (SMT), which
allows multiple threads to complete for and share all
of the processor's resources every cycle. The most
compelling reason for running parallel applications on
an SMT processor is its ability to use thread-level
parallelism and instruction-level parallelism
interchangeably. By permitting multiple threads to
share the processor's functional units simultaneously,
the processor can use both ILP and TLP to accommodate
variations in parallelism. When a program has only a
single thread, all of the SMT processor's resources can
be dedicated to that thread; when more TLP exists, this
parallelism can compensate for a lack of per-thread
ILP. We examine two alternative on-chip parallel
architectures for the next generation of processors. We
compare SMT and small-scale, on-chip multiprocessors in
their ability to exploit both ILP and TLP. First, we
identify the hardware bottlenecks that prevent
multiprocessors from effectively exploiting ILP. Then,
we show that because of its dynamic resource sharing,
SMT avoids these inefficiencies and benefits from being
able to run more threads on a single processor. The use
of TLP is especially advantageous when per-thread ILP
is limited. The ease of adding additional thread
contexts on an SMT (relative to adding additional
processors on an MP) allows simultaneous multithreading
to expose more parallelism, further increasing
functional unit utilization and attaining a 52\%
average speedup (versus a four-processor, single-chip
multiprocessor with comparable execution resources).
This study also addresses an often-cited concern
regarding the use of thread-level parallelism or
multithreading: interference in the memory system and
branch prediction hardware. We find the multiple
threads cause interthread interference in the caches
and place greater demands on the memory system, thus
increasing average memory latencies. By exploiting
threading-level parallelism, however, SMT hides these
additional latencies, so that they only have a small
impact on total program performance. We also find that
for parallel applications, the additional threads have
minimal effects on branch prediction.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "measurement; performance",
subject = "{\bf C.1.2} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Multiple Data Stream Architectures
(Multiprocessors), Parallel processors**. {\bf C.0}
Computer Systems Organization, GENERAL, Instruction set
design. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management.",
}
@Article{Levy:1997:GE,
author = "Henry M. Levy",
title = "Guest Editorial",
journal = j-TOCS,
volume = "15",
number = "4",
pages = "355--356",
month = nov,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p355-levy/",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Anderson:1997:CPW,
author = "Jennifer M. Anderson and Lance M. Berc and Jeffrey
Dean and Sanjay Ghemawat and Monika R. Henzinger and
Shun-Tak A. Leung and Richard L. Sites and Mark T.
Vandevoorde and Carl A. Waldspurger and William E.
Weihl",
title = "Continuous Profiling: Where Have All the Cycles
Gone?",
journal = j-TOCS,
volume = "15",
number = "4",
pages = "357--390",
month = nov,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p357-anderson/",
abstract = "This article describes the Digital Continuous
Profiling Infrastructure, a sampling-based profiling
system designed to run continuously on production
systems. The system supports multiprocessors, works on
unmodified executables, and collects profiles for
entire systems, including user programs, shared
libraries, and the operating system kernel. Samples are
collected at a high rate (over 5200 samples/sec. per
333MHz processor), yet with low overhead (1-3\%
slowdown for most workloads). Analysis tools supplied
with the profiling system use the sample data to
produce a precise and accurate accounting, down to the
level of pipeline stalls incurred by individual
instructions, of where time is bring spent. When
instructions incur stalls, the tools identify possible
reasons, such as cache misses, branch mispredictions,
and functional unit contention. The fine-grained
instruction-level analysis guides users and automated
optimizers to the causes of performance problems and
provides important insights for fixing them.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "performance",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Performance attributes. {\bf D.2.2}
Software, SOFTWARE ENGINEERING, Design Tools and
Techniques. {\bf D.2.6} Software, SOFTWARE ENGINEERING,
Programming Environments. {\bf D.4.7} Software,
OPERATING SYSTEMS, Organization and Design. {\bf D.4.8}
Software, OPERATING SYSTEMS, Performance. {\bf D.4.0}
Software, OPERATING SYSTEMS, General.",
}
@Article{Savage:1997:EDD,
author = "Stefan Savage and Michael Burrows and Greg Nelson and
Patrick Sobalvarro and Thomas Anderson",
title = "{Eraser}: a Dynamic Data Race Detector for
Multithreaded Programs",
journal = j-TOCS,
volume = "15",
number = "4",
pages = "391--411",
month = nov,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p391-savage/",
abstract = "Multithreaded programming is difficult and error
prone. It is easy to make a mistake in synchronization
that produces a data race, yet it can be extremely hard
to locate this mistake during debugging. This article
describes a new tool, called Eraser, for dynamically
detecting data races in lock-based multithreaded
programs. Eraser uses binary rewriting techniques to
monitor every shared-monory reference and verify that
consistent locking behavior is observed. We present
several case studies, including undergraduate
coursework and a multithreaded Web search engine, that
demonstrate the effectiveness of this approach.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; experimentation; reliability",
subject = "{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing
and Debugging, Monitors. {\bf D.1.3} Software,
PROGRAMMING TECHNIQUES, Concurrent Programming,
Parallel programming. {\bf D.2.5} Software, SOFTWARE
ENGINEERING, Testing and Debugging, Debugging aids.
{\bf D.2.5} Software, SOFTWARE ENGINEERING, Testing and
Debugging, Tracing. {\bf D.4.1} Software, OPERATING
SYSTEMS, Process Management, Concurrency. {\bf D.4.1}
Software, OPERATING SYSTEMS, Process Management,
Deadlocks. {\bf D.4.1} Software, OPERATING SYSTEMS,
Process Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Mutual exclusion.",
}
@Article{Bugnion:1997:DRC,
author = "Edouard Bugnion and Scott Devine and Kinshuk Govil and
Mendel Rosenblum",
title = "{Disco}: Running Commodity Operating Systems on
Scalable Multiprocessors",
journal = j-TOCS,
volume = "15",
number = "4",
pages = "412--447",
month = nov,
year = "1997",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1997-15-4/p412-bugnion/",
abstract = "In this article we examine the problem of extending
modern operating systems to run efficiently on
large-scale shared-memory multiprocessors without a
large implementation effort. Our approach brings back
an idea popular in the 1970s: virtual machine monitors.
We use virtual machines to run multiple commodity
operating systems on a scalable multiprocessor. This
solution addresses many of the challenges facing the
system software for these machines. We demonstrate our
approach with a prototype called Disco that runs
multiple copies of Silicon Graphics' IRIX operating
system on a multiprocessor. Our experience shows that
the overheads of the monitor are small and that the
approach provides scalability as well as the ability to
deal with the nonuniform memory access time of these
systems. To reduce the memory overheads associated with
running multiple operating systems, virtual machines
transparently share major data structures such as the
program code and the file system buffer cache. We use
the distributed-system support of modern operating
systems to export a partial single system image to the
users. The overall solution achieves most of the
benefits of operating systems customized for scalable
multiprocessors, yet it can be achieved with a
significantly smaller implementation effort.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf D.4.7} Software, OPERATING SYSTEMS, Organization
and Design. {\bf C.1.2} Computer Systems Organization,
PROCESSOR ARCHITECTURES, Multiple Data Stream
Architectures (Multiprocessors), Parallel
processors**.",
}
@Article{Bal:1998:PEO,
author = "Henri E. Bal and Raoul Bhoedjang and Rutger Hofman and
Ceriel Jacobs and Koen Langendoen and Tim R{\"u}hl and
M. Frans Kaashoek",
title = "Performance Evaluation of the {Orca} Shared-Object
System",
journal = j-TOCS,
volume = "16",
number = "1",
pages = "1--40",
month = feb,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-1/p1-bal/",
abstract = "Orca is a portable, object-based distributed shared
memory (DSM) system. This article studies and evaluates
the design choices made in the Orca system and compares
Orca with other DSMs. The article gives a quantitative
analysis of Orca's coherence protocol (based on
write-updates with function shipping), the totally
ordered group communication protocol, the strategy for
object placement, and the all-software, user-space
architecture. Performance measurements for 10 parallel
applications illustrate the trade-offs made in the
design of Orca and show that essentially the right
design decisions have been made. A write-update
protocol with function shipping is effective for Orca,
especially since it is used in combination with
techniques that avoid replicating objects that have a
low read/write ratio. The overhead of totally ordered
group communication on application performance is low.
The Orca system is able to make near-optimal decisions
for object placement and replication. In addition, the
article compares the performance of Orca with that of a
page-based DSM (TreadMarks) and another object-based
DSM (CRL). It also analyzes the communication overhead
of the DSMs for several applications. All performance
measurements are done on a 32-node Pentium Pro cluster
with Myrinet and Fast Ethernet networks. The results
show that Orca programs send fewer messages and less
data than the TreadMarks and CRL programs and obtain
better speedups.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; languages; performance",
subject = "{\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Run-time environments. {\bf D.1.3}
Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Distributed programming. {\bf D.1.3}
Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Parallel programming. {\bf D.3.2}
Software, PROGRAMMING LANGUAGES, Language
Classifications, Concurrent, distributed, and parallel
languages. {\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Compilers.",
}
@Article{Derk:1998:RFT,
author = "M. D. Derk and L. S. DeBrunner",
title = "Reconfiguration for Fault Tolerance Using Graph
Grammars",
journal = j-TOCS,
volume = "16",
number = "1",
pages = "41--54",
month = feb,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-1/p41-derk/",
abstract = "Reconfiguration for fault tolerance is a widely
studied field, but this work applies graph grammars to
this discipline for the first time. Reconfiguration
Graph Grammars (RGG) are defined and applied to the
definition of processor array reconfiguration
algorithms. The nodes of a graph are associated with
the processors of a processor array, and the edges are
associated with those interprocessor communication
lines that are active. The resulting algorithms for
dynamic (run-time) reconfiguration are efficient and
can be implemented distributively.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; design; reliability; theory",
subject = "{\bf C.4} Computer Systems Organization, PERFORMANCE
OF SYSTEMS, Reliability, availability, and
serviceability. {\bf F.4.2} Theory of Computation,
MATHEMATICAL LOGIC AND FORMAL LANGUAGES, Grammars and
Other Rewriting Systems. {\bf C.1.2} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Multiple Data
Stream Architectures (Multiprocessors),
Multiple-instruction-stream, multiple-data-stream
processors (MIMD).",
}
@Article{Mowry:1998:TLM,
author = "Todd C. Mowry",
title = "Tolerating Latency in Multiprocessors through
Compiler-Inserted Prefetching",
journal = j-TOCS,
volume = "16",
number = "1",
pages = "55--92",
month = feb,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-1/p55-mowry/",
abstract = "The large latency of memory accesses in large-scale
shared-memory multiprocessors is a key obstacle to
achieving high processor utilization. {\em
Software-controlled prefetching\/} is a technique for
tolerating memory latency by explicitly executing
instructions to move data close to the processor before
the data are actually needed. To minimize the burden on
the programmer, compiler support is needed to
automatically insert prefetch instructions into the
code. A key challenge when inserting prefetches is
ensuring that the overheads of prefetching do not
outweigh the benefits. While previous studies have
demonstrated the effectiveness of hand-inserted
prefetching in multiprocessor applications, the benefit
of {\em compiler-inserted\/} prefetching in practice
has remained an open question. This article proposes
and evaluates a new compiler algorithm for inserting
prefetches into multiprocessor code. The proposed
algorithm attempts to minimize overheads by only
issuing prefetches for references that are predicted to
suffer cache misses. The algorithm can prefetch both
dense-matrix and sparse-matrix codes, thus covering a
large fraction of scientific applications. We have
implemented our algorithm in the SUIF(Stanford
University Intermediate Format) optimizing compiler.
The results of our detailed architectural simulations
demonstrate that compiler-inserted prefetching can
improve the speed of some parallel applications by as
much as a factor of two.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf D.3.4} Software, PROGRAMMING LANGUAGES,
Processors, Optimization. {\bf B.3.2} Hardware, MEMORY
STRUCTURES, Design Styles, Cache memories. {\bf D.3.4}
Software, PROGRAMMING LANGUAGES, Processors,
Compilers.",
}
@Article{Agarwal:1998:TMR,
author = "D. A. Agarwal and L. E. Moser and P. M. Melliar-Smith
and R. K. Budhia",
title = "The {Totem} Multiple-Ring Ordering and Topology
Maintenance Protocol",
journal = j-TOCS,
volume = "16",
number = "2",
pages = "93--132",
month = may,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-2/p93-agarwal/",
abstract = "The Totem multiple-ring protocol provides reliable
totally ordered delivery of messages across multiple
local-area networks interconnected by gateways. This
consistent message order is maintained in the presence
of network partitioning and remerging, and of processor
failure and recovery. The protocol provides accurate
topology change information as part of the global total
order of messages. It addresses the issue of
scalability and achieves a latency that increases
logarithmically with system size by exploiting process
group locality and selective forwarding of messages
through the gateways. Pseudocode for the protocol and
an evaluation of its performance are given. ---Authors'
Abstract",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "algorithms; performance; reliability",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf C.2.1} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS, Network
Architecture and Design, Network communications. {\bf
C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Fault
tolerance.",
}
@Article{Lamport:1998:PTP,
author = "Leslie Lamport",
title = "The Part-Time Parliament",
journal = j-TOCS,
volume = "16",
number = "2",
pages = "133--169",
month = may,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-2/p133-lamport/",
abstract = "Recent archaeological discoveries on the island of
Paxos reveal that the parliament functioned despite the
peripatetic propensity of its part-time legislators.
The legislators maintained consistent copies of the
parliamentary record, despite their frequent forays
from the chamber and the forgetfulness of their
messengers. The Paxon parliament's protocol provides a
new way of implementing the state machine approach to
the design of distributed systems.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; reliability",
subject = "{\bf C.2.4} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Distributed Systems,
Network operating systems. {\bf D.4.5} Software,
OPERATING SYSTEMS, Reliability, Fault-tolerance. {\bf
J.1} Computer Applications, ADMINISTRATIVE DATA
PROCESSING, Government.",
}
@Article{Horowitz:1998:IMO,
author = "Mark Horowitz and Margaret Martonoisi and Todd C.
Mowry and Michael D. Smith",
title = "Informing Memory Operations: Memory Performance
Feedback Mechanisms and Their Applications",
journal = j-TOCS,
volume = "16",
number = "2",
pages = "170--205",
month = may,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-2/p170-horowitz/",
abstract = "Memory latency is an important bottleneck in system
performance that cannot be adequately solved by
hardware alone. Several promising software techniques
have been shown to address this problem successfully in
specific situations. However, the generality of these
software approaches has been limited because current
architectures do not provide a fine-grained,
low-overhead mechanism for observing and reacting to
memory behavior directly. To fill this need, this
article proposes a new class of memory operations
called {\em informing memory operations}, which
essentially consist of a memory operating combined
(either implicitly or explicitly) with a conditional
branch-and-ink operation that is taken only if the
reference suffers a cache miss. This article describes
two different implementations of informing memory
operations. One is based on a {\em cache-outcome
condition code}, and the other is based on {\em
low-overhead traps.\/} We find that modern
in-order-issue and out-of-order-issue superscalar
processors already contain the bulk of the necessary
hardware support. We describe how a number of
software-based memory optimizations can exploit
informing memory operations to enhance performance, and
we look at cache coherence with fine-grained access
control as a case study. Our performance results
demonstrate that the runtime overhead of invoking the
informing mechanism on the Alpha 21164 and MIPS R10000
processors is generally small enough to provide
considerable flexibility to hardware and software
designers, and that the cache coherence application has
improved performance compared to other current
solutions. We believe that the inclusion of informing
memory operations in future processors may spur even
more innovative performance optimizations.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance",
subject = "{\bf B.3.2} Hardware, MEMORY STRUCTURES, Design
Styles, Cache memories. {\bf C.4} Computer Systems
Organization, PERFORMANCE OF SYSTEMS, Measurement
techniques. {\bf D.3.4} Software, PROGRAMMING
LANGUAGES, Processors, Compilers. {\bf B.8.2} Hardware,
PERFORMANCE AND RELIABILITY, Performance Analysis and
Design Aids.",
}
@Article{Alexandrov:1998:UPG,
author = "Albert D. Alexandrov and Maximilian Ibel and Klaus E.
Schauser and Chris J. Scheiman",
title = "{Ufo}: a Personal Global File System Based on
User-Level Extensions to the Operating System",
journal = j-TOCS,
volume = "16",
number = "3",
pages = "207--233",
month = aug,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-3/p207-alexandrov/",
abstract = "In this article we show how to extend a wide range of
functionality of standard operation systems completely
at the user level. Our approach works by intercepting
selected system calls at the user level, using tracing
facilities such as the /proc file system provided by
many Unix operating systems. The behavior of some
intercepted system calls is then modified to implement
new functionality. This approach does not require any
relinking or recompilation of existing applications. In
fact, the extensions can even be dynamically
``installed'' into already running processes. The
extensions work completely at the user level and
install without system administrator assistance.
Individual users can choose what extensions to run, in
effect creating a personalized operating system view
for themselves. We used this approach to implement a
global file system, called Ufo, which allows users to
treat remote files exactly as if they were local.
Currently, Ufo supports file access through the FTP and
HTTP protocols and allows new protocols to be plugged
in. While several other projects have implemented
global file system abstractions, they all require
either changes to the operating system or modifications
to standard libraries. The article gives a detailed
performance analysis of our approach to extending the
OS and establishes that Ufo introduces acceptable
overhead for common applications even though
intercepting individual system calls incurs a high
cost.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "performance",
subject = "{\bf D.4.3} Software, OPERATING SYSTEMS, File Systems
Management. {\bf D.4.3} Software, OPERATING SYSTEMS,
File Systems Management, Distributed file systems. {\bf
D.4.3} Software, OPERATING SYSTEMS, File Systems
Management, Access methods.",
}
@Article{Gabbay:1998:UVP,
author = "Freddy Gabbay and Avi Mendelson",
title = "Using Value Prediction to Increase the Power of
Speculative Execution Hardware",
journal = j-TOCS,
volume = "16",
number = "3",
pages = "234--270",
month = aug,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-3/p234-gabbay/",
abstract = "This article presents an experimental and analytical
study of value prediction and its impact on speculative
execution in superscalar microprocessors. Value
prediction is a new paradigm that suggests predicting
outcome values of operations (at run-time ) and using
these predicted values to trigger the execution of
true-data-dependent operations speculatively. As a
result, stals to memory locations can be reduced and
the amount of instruction-level parallelism can be
extended beyond the limits of the program's dataflow
graph. This article examines the characteristics of the
value prediction concept from two perspectives: (1) the
related phenomena that are reflected in the nature of
computer programs and (2) the significance of these
phenomena to boosting instruction-level parallelism of
superscalar microprocessors that support speculative
execution. In order to better understand these
characteristics, our work combines both analytical and
experimental studies.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; measurement; performance",
subject = "{\bf C.0} Computer Systems Organization, GENERAL,
System architectures. {\bf C.1.1} Computer Systems
Organization, PROCESSOR ARCHITECTURES, Single Data
Stream Architectures, RISC. {\bf C.5.3} Computer
Systems Organization, COMPUTER SYSTEM IMPLEMENTATION,
Microcomputers, Microprocessors. {\bf C.0} Computer
Systems Organization, GENERAL, Instruction set
design.",
}
@Article{Juurlink:1998:QCP,
author = "Ben H. H. Juurlink and Harry A. G. Wijshoff",
title = "A Quantitative Comparison of Parallel Computation
Models",
journal = j-TOCS,
volume = "16",
number = "3",
pages = "271--318",
month = aug,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 13 18:36:53 MST 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-3/p271-juurlink/",
abstract = "In recent years, a large number of parallel
computation models have been proposed to replace the
PRAM as the parallel computation model presented to the
algorithm designer. Although mostly the theoretical
justifications for these models are sound, and many
algorithmic results where obtained through these
models, little experimentation has been conducted to
validate the effectiveness of these models for
developing cost-effective algorithms and applications
on existing hardware platforms. In this article a first
attempt is made to perform a detailed experimental
account on the preciseness of these models. The achieve
this, three models (BSP, E-BSP, and BPRAM) were
selected and validated on five parallel platforms (Cray
T3E, Thinking Machines CM-5, Intel Paragon, MasPar
MP-1, and Parsytec GCel). The work described in this
article consists of three parts. First, the predictive
capabilities of the models are investigated. Unlike
previous experimental work, which mostly demonstrated a
close match between the measured and predicted
execution times, this article shows that there are
several situations in which the models do not precisely
predict the actual runtime behavior of an algorithm
implementation. Second, a comparison between the models
is provided in order to determine the model that
induces that most efficient algorithms. Lastly, the
performance achieved by the model-derived algorithms is
compared with the performance attained by
machine-specific algorithms in order to examine the
effectiveness of deriving fast algorithms through the
formalisms of the models.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "experimentation; performance",
subject = "{\bf C.1.4} Computer Systems Organization, PROCESSOR
ARCHITECTURES, Parallel Architectures. {\bf C.4}
Computer Systems Organization, PERFORMANCE OF SYSTEMS,
Modeling techniques. {\bf D.1.3} Software, PROGRAMMING
TECHNIQUES, Concurrent Programming, Parallel
programming.",
}
@Article{Bhatti:1998:CSC,
author = "Nina T. Bhatti and Matti A. Hiltunen and Richard D.
Schlichting and Wanda Chiu",
title = "{Coyote}: a system for constructing fine-grain
configurable communication services",
journal = j-TOCS,
volume = "16",
number = "4",
pages = "321--366",
month = nov,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 26 16:27:34 MDT 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-4/p321-bhatti/",
abstract = "Communication-oriented abstractions such as atomic
multicast, group RPC, and protocols for
location-independent mobile computing can simplify the
development of complex applications built on
distributed systems. This article describes Coyote, a
system that supports the construction of highly modular
and configurable versions of such abstractions. Coyote
extends the notion of protocol objects and hierarchical
composition found in existing systems with support for
finer-grain microprotocol objects and a nonhierarchical
composition scheme for use within a single layer of a
protocol stack. A customized service is constructed by
selecting microprotocols based on their semantic
guarantees and configuring them together with a
standard runtime system to form a composite protocol
implementing the service. This composite protocol is
then composed hierarchically with other protocols to
form a complete network subsystem. The overall approach
is described and illustrated with examples of services
that have been constructed using Coyote, including
atomic multicast, group RPC, membership, and mobile
computing protocols. A prototype implementation based
on extending {\em x\/}-kernel version 3.2 running on
Mach 3.0 with support for microprotocols is also
presented, together with performance results from a
suite of microprotocols from which over 60 variants of
group RPC can be constructed.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "design; experimentation; performance; reliability",
subject = "{\bf C.2.2} Computer Systems Organization,
COMPUTER-COMMUNICATION NETWORKS, Network Protocols,
Protocol architecture. {\bf C.2.4} Computer Systems
Organization, COMPUTER-COMMUNICATION NETWORKS,
Distributed Systems, Distributed applications. {\bf
D.1.3} Software, PROGRAMMING TECHNIQUES, Concurrent
Programming, Distributed programming. {\bf D.4.4}
Software, OPERATING SYSTEMS, Communications Management,
Network communication. {\bf D.4.5} Software, OPERATING
SYSTEMS, Reliability, Fault-tolerance. {\bf D.4.7}
Software, OPERATING SYSTEMS, Organization and Design,
Distributed systems. {\bf D.2.13} Software, SOFTWARE
ENGINEERING, Reusable Software.",
}
@Article{Epema:1998:DUS,
author = "D. H. J. Epema",
title = "Decay-usage scheduling in multiprocessors",
journal = j-TOCS,
volume = "16",
number = "4",
pages = "367--415",
month = nov,
year = "1998",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 26 16:27:34 MDT 1999",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org:80/pubs/citations/journals/tocs/1998-16-4/p367-epema/",
abstract = "Decay-usage scheduling is a priority-aging
time-sharing scheduling policy capable of dealing with
a workload of both interactive and batch jobs by
decreasing the priority of a job when it acquires CPU
time, and by increasing its priority when it does not
use the (a) CPU. In this article we deal with a
decay-usage scheduling policy in multiprocessors
modeled after widely used systems. The priority of a
job consists of a base priority and a time-dependent
component based on processor usage. Because t he
priorities in our model are time dependent, a
queuing-theoretic analysis---for instance, for the mean
job response time---seems impossible. Still, it turns
out that as a consequence of the scheduling policy, the
shares of the available CPU time obtained by jobs
converge, and a deterministic analysis for these shares
is feasible: We show how for a fixed set of jobs with
large processing demands, the steady-state shares can
be obtained given the base priorities, and conversely,
how to set the base priorities given the required
shares. In addition, we analyze the relation between
the values of the scheduler parameters and the level of
control it can exercise over the steady-state share
ratios, and we deal with the rate of convergence. We
validate the model by simulations and by measurements
of actual systems.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "measurement; performance",
subject = "{\bf D.4.1} Software, OPERATING SYSTEMS, Process
Management,
Multiprocessing/multiprogramming/multitasking. {\bf
D.4.1} Software, OPERATING SYSTEMS, Process Management,
Scheduling. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Measurements. {\bf D.4.8} Software,
OPERATING SYSTEMS, Performance, Modeling and
prediction. {\bf D.4.8} Software, OPERATING SYSTEMS,
Performance, Simulation.",
}
@Article{Srinivasan:1999:FAL,
author = "V. Srinivasan and G. Varghese",
title = "Fast address lookups using controlled prefix
expansion",
journal = j-TOCS,
volume = "17",
number = "1",
pages = "1--40",
month = feb,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-1/p1-srinivasan/",
abstract = "Internet (IP) address lookup is a major bottleneck in
high-performance routers. IP address lookup is
challenging because it requires a {\em longest matching
prefix\/} lookup. It is compounded by increasing
routing table sizes, increased traffic, higher-speed
links, and the migration to 128-bit IPv6 addresses. We
describe how IP lookups and updates can be made faster
using a set of transformation techniques. Our main
technique, {\em controlled prefix expansion},
transforms a set of prefixes into an equivalent set
with fewer prefix lengths. In addition, we use
optimization techniques based on dynamic programming,
and local transformations of data structures to improve
cache behavior. When applied to trie search, our
techniques provide a range of algorithms ({\em Expanded
Tries\/}) whose performance can be tuned. For example,
using a processor with 1MB of L2 cache, search of the
MaeEast database containing 38000 prefixes can be done
in 3 L2 cache accesses. On a 300MHz Pentium II which
takes 4 cycles for accessing the first word of the L2
cacheline, this algorithm has a worst-case search time
of 180 nsec., a worst-case insert/delete time of 2.5
msec., and an average insert/delete time of 4 usec.
Expanded tries provide faster search {\em and\/} faster
insert/delete times than earlier lookup algorithms.
When applied to Binary Search on Levels, our techniques
improve worst-case search times by nearly a factor of 2
(using twice as much storage) for the MaeEast database.
Our approach to algorithm design is based on
measurements using the VTune tool on a Pentium to
obtain dynamic clock cycle counts. Our techniques also
apply to similar address lookup problems in other
network protocols.",
acknowledgement = ack-nhfb,
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "binary search on levels; controlled prefix expansion;
expanded tries; Internet address lookup; longest-prefix
match; multibit tries; router performance",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Local and Wide-Area
Networks (C.2.5): {\bf Internet}; Computer Systems
Organization --- Computer-Communication Networks ---
Network Protocols (C.2.2): {\bf Routing protocols};
Computer Systems Organization ---
Computer-Communication Networks --- Internetworking
(C.2.6): {\bf Routers}",
}
@Article{Birman:1999:BM,
author = "Kenneth P. Birman and Mark Hayden and Oznur Ozkasap
and Zhen Xiao and Mihai Budiu and Yaron Minsky",
title = "Bimodal multicast",
journal = j-TOCS,
volume = "17",
number = "2",
pages = "41--88",
month = may,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-2/p41-birman/",
abstract = "There are many methods for making a multicast protocol
``reliable.'' At one end of the spectrum, a reliable
multicast protocol might offer atomicity guarantees,
such as all-or-nothing delivery, delivery ordering, and
perhaps additional properties such as virtually
synchronous addressing. At the other are protocols that
use local repair to overcome transient packet loss in
the network, offering ``best effort'' reliability. Yet
none of this prior work has treated stability of
multicast delivery as a basic reliability property,
such as might be needed in an internet radio,
television, or conferencing application. This article
looks at reliability with a new goal: development of a
multicast protocol which is reliable in a sense that
can be rigorously quantified and includes throughput
stability guarantees. We characterize this new protocol
as a ``bimodal multicast'' in reference to its
reliability model, which corresponds to a family of
bimodal probability distributions. Here, we introduce
the protocol, provide a theoretical analysis of its
behavior, review experimental results, and discuss some
candidate applications. These confirm that bimodal
multicast is reliable, scalable, and that the protocol
provides remarkably stable delivery throughput.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Network
Architecture and Design (C.2.1): {\bf Network
communications}",
}
@Article{Diniz:1999:ESO,
author = "Pedro C. Diniz and Martin C. Rinard",
title = "Eliminating synchronization overhead in automatically
parallelized programs using dynamic feedback",
journal = j-TOCS,
volume = "17",
number = "2",
pages = "89--132",
month = may,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-2/p89-diniz/",
abstract = "This article presents dynamic feedback, a technique
that enables computations to adapt dynamically to
different execution environments. A compiler that uses
dynamic feedback produces several different versions of
the same source code; each version uses a different
optimization policy. The generated code alternately
performs sampling phases and production phases. Each
sampling phase measures the overhead of each version in
the current environment. Each production phase uses the
version with the least overhead in the previous
sampling phase. The computation periodically resamples
to adjust dynamically to changes in the environment. We
have implemented dynamic feedback in the context of a
parallelizing compiler for object-based programs. The
generated code uses dynamic feedback to automatically
choose the best synchronization optimization policy.
Our experimental results show that the synchronization
optimization policy has a significant impact on the
overall performance of the computation, that the best
policy varies from program to program, that the
compiler is unable to statically choose the best
policy, and that dynamic feedback enables the generated
code to exhibit performance that is comparable to that
of code that has been manually tuned to use the best
policy. We have also performed a theoretical analysis
which provides, under certain assumptions, a guaranteed
optimality bound for dynamic feedback relative to a
hypothetical (and unrealizable) optimal algorithm that
uses the best policy at every point during the
execution.",
acknowledgement = ack-nhfb,
generalterms = "Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "parallel computing; parallelizing compilers",
subject = "Computer Systems Organization --- Performance of
Systems (C.4): {\bf Measurement techniques}; Software
--- Programming Techniques --- Concurrent Programming
(D.1.3); Software --- Programming Techniques ---
Object-oriented Programming (D.1.5); Software ---
Programming Languages --- Processors (D.3.4): {\bf Code
generation}; Software --- Programming Languages ---
Processors (D.3.4): {\bf Compilers}; Software ---
Programming Languages --- Processors (D.3.4): {\bf
Optimization}; Software --- Programming Languages ---
Processors (D.3.4): {\bf Run-time environments};
Software --- Programming Techniques --- Concurrent
Programming (D.1.3): {\bf Parallel programming};
Computer Systems Organization --- Performance of
Systems (C.4): {\bf Design studies}",
}
@Article{Ronsse:1999:RFI,
author = "Michiel Ronsse and Koen {De Bosschere}",
title = "{RecPlay}: a fully integrated practical record\slash
replay system",
journal = j-TOCS,
volume = "17",
number = "2",
pages = "133--152",
month = may,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-2/p133-ronsse/",
abstract = "This article presents a practical solution for the
cyclic debugging of nondeterministic parallel programs.
The solution consists of a combination of record\slash
replay with automatic on-the-fly data race detection.
This combination enables us to limit the record phase
to the more efficient recording of the synchronization
operations, while deferring the time-consuming data
race detection to the replay phase. As the record phase
is highly efficient, there is no need to switch it off,
hereby eliminating the possibility of Heisenbugs
because tracing can be left on all the time. This
article describes an implementation of the tools needed
to support RecPlay.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Experimentation; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "binary code modification; multithreaded programming;
race detection",
subject = "Software --- Programming Techniques --- Concurrent
Programming (D.1.3): {\bf Parallel programming};
Software --- Software Engineering --- Testing and
Debugging (D.2.5): {\bf Debugging aids}; Software ---
Software Engineering --- Testing and Debugging (D.2.5):
{\bf Monitors}; Software --- Software Engineering ---
Testing and Debugging (D.2.5): {\bf Tracing}; Software
--- Operating Systems --- Process Management (D.4.1):
{\bf Concurrency}; Software --- Operating Systems ---
Process Management (D.4.1): {\bf Deadlocks}; Software
--- Operating Systems --- Process Management (D.4.1):
{\bf Multiprocessing/multiprogramming/multitasking};
Software --- Operating Systems --- Process Management
(D.4.1): {\bf Mutual exclusion}; Software --- Operating
Systems --- Process Management (D.4.1): {\bf
Synchronization}",
}
@Article{Amsaleg:1999:GCC,
author = "Laurent Amsaleg and Michael J. Franklin and Olivier
Gruber",
title = "Garbage collection for a client-server persistent
object store",
journal = j-TOCS,
volume = "17",
number = "3",
pages = "153--201",
month = aug,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-3/p153-amsaleg/",
abstract = "We describe an efficient server-based algorithm for
garbage collecting persistent object stores in a
client-server environment. The algorithm is incremental
and runs concurrently with client transactions. Unlike
previous algorithms, it does not hold any transactional
locks on data and does non require callbacks to
clients. It is fault-tolerant, but performs very little
logging. The algorithm has been designed to be
integrated into existing systems, and therefore it
works with standard implementation techniques such as
Two-Phase Locking and Write-Ahead-Logging. In addition,
it supports client-server performance optimizations
such as client caching and flexible management of
client buffers. We describe an implementation of the
algorithm in the EXODUS storage manager and present the
results of a performance study of the implementation.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "client-server system; logging; persistent
object-store; recovery",
subject = "Software --- Operating Systems --- Storage Management
(D.4.2): {\bf Garbage collection}; Information Systems
--- Database Management --- Systems (H.2.4): {\bf
Distributed databases}; Information Systems ---
Database Management --- Systems (H.2.4): {\bf
Object-oriented databases}; Information Systems ---
Database Management --- Systems (H.2.4): {\bf
Transaction processing}",
}
@Article{Raghavachari:1999:ALP,
author = "Mukund Raghavachari and Anne Rogers",
title = "{Ace}: a language for parallel programming with
customizable protocols",
journal = j-TOCS,
volume = "17",
number = "3",
pages = "202--248",
month = aug,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-3/p202-raghavachari/",
abstract = "Customizing the protocols that manage accesses to
different data structures within an application can
improve the performance of software shared-memory
programs substantially. Existing systems for using
customizable protocols are hard to use directly because
the mechanisms they provide for manipulating protocols
are low-level ones. This article is an in-depth study
of the issues involved in providing language support
for application-specific protocols. We describe the
design and implementation of a new language for
parallel programming, Ace, that integrates support for
customizable protocols with minimal extensions to C.
Ace applications are developed using a shared-memory
model with a default sequentially consistent protocol.
Performance can then be optimized, with minor
modifications to the application, by experimenting with
different protocol libraries. The design of Ace was
driven by a detailed study of the use of customizable
protocols. We delineate the issues that arise when
programming with customizable protocols and present
novel abstractions that allow for their easy use. We
describe the design and implementation of a runtime
system and compiler for Ace nd discuss compiler
optimizations that improve the performance of such
software shared-memory systems. We study the
communication patterns of a set of benchmark
applications and consider the use of customizable
protocols to optimize their performance. We evaluate
the performance of our system through experiments on a
Thinking Machine CM-5 and a Cray T3E. We also present
measurements that demonstrate that Ace has good
performance compared to that of a modern distributed
shared-memory system.",
acknowledgement = ack-nhfb,
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "parallel processing",
subject = "Software --- Programming Languages --- Language
Constructs and Features (D.3.3); Software ---
Programming Languages --- Processors (D.3.4): {\bf
Compilers}; Software --- Programming Languages ---
Processors (D.3.4): {\bf Run-time environments};
Software --- Programming Languages --- Language
Classifications (D.3.2); Software --- Programming
Techniques --- Concurrent Programming (D.1.3): {\bf
Parallel programming}",
}
@Article{Hari:1999:APS,
author = "Adiseshu Hari and George Varghese and Guru Parulkar",
title = "An architecture for packet-striping protocols",
journal = j-TOCS,
volume = "17",
number = "4",
pages = "249--287",
month = nov,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/1999-17-4/p249-hari/p249-hari.pdf;
http://www.acm.org/pubs/citations/journals/tocs/1999-17-4/p249-hari/",
abstract = "Link-striping algorithms are often used to overcome
transmission bottlenecks in computer networks.
Traditional striping algorithms suffer from two major
disadvantages. They provide inadequate load sharing in
the presence of variable-length packets, and may result
in non-FIFO delivery of data. We describe a new family
of link-striping algorithms that solves both problems.
Our scheme applies to any layer that can provide
multiple FIFO channels. We deal with variable-sized
packets by showing how fair-queuing algorithms can be
transformed into load-sharing algorithms. Our
transformation results in practical load-sharing
protocols, and shows a theoretical connection between
two seemingly different problems. The same
transformation can be applied to obtain load-sharing
protocols for links with different capacities. We deal
with the FIFO requirement for two separate cases. If a
sequence number can be added to each packet, we show
how to speed up packet processing by letting the
receiver simulate the sender algorithm. If no header
can be added, we show how to provide quasi FIFO
delivery. Quasi FIFO is FIFO except during occasional
periods of loss of synchronization. We argue that quasi
FIFO is adequate for most applications. We also
describe a simple technique for speedy restoration of
synchronization in the event of loss. We develop an
architectural framework for transparently embedding our
protocol at the network level by striping IP packets
across multiple physical interfaces. The resulting
stripe protocol has been implemented within the NetBSD
kernel. Our measurements and simulations show that the
protocol offers scalable throughput even when striping
is done over dissimilar links, and that the protocol
synchronized quickly after packet loss. Measurements
show performance improvements over conventional
round-robin striping schemes and striping schemes that
do not resequence packets. Some aspects of our solution
have been implemented in Cisco's router operating
system (IOS 11.3) in the context of Multilink PPP
striping.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Design; Measurement; Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "causal fair queuing; fair queuing; load sharing;
multilink PPP; packet striping; stripe protocol;
striping",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Network Protocols
(C.2.2): {\bf Protocol architecture}",
}
@Article{McKinley:1999:QLN,
author = "Kathryn S. McKinley and Olivier Temam",
title = "Quantifying loop nest locality using {SPEC'95} and the
{Perfect} benchmarks",
journal = j-TOCS,
volume = "17",
number = "4",
pages = "288--336",
month = nov,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-4/p288-mckinley/",
abstract = "This article analyzes and quantifies the locality
characteristics of numerical loop nests in order to
suggest future directions for architecture and software
cache optimizations. Since most programs spend the
majority of their time in nests, the vast majority of
cache optimization techniques target loop nests. In
contrast, the locality characteristics that drive these
optimizations are usually collected across the entire
application rather than at the nest level. Researchers
have studied numerical codes for so long that a number
of commonly held assertions have emerged on their
locality characteristics. In light of these assertions,
we use the SPEC'95 and Perfect Benchmarks to take a new
look at measuring locality on numerical codes based on
references, loop nests, and program locality
properties. Our results show that several popular
assertions are at best overstatements. For example,
although most reuse is within a loop nest, in line with
popular assertions, most misses are internest capacity
misses, and they correspond to potential reuse between
nearby loop nests. In addition, we find that temporal
and spatial reuse have balanced roles within a loop
nest and that most reuse across nests and the entire
program is temporal. These results are consistent with
high hit rates (80\% or more hits), but go against the
commonly held assumption that spatial reuse dominates.
Our locality measurements reveal important differences
between loop nests and programs, refute some popular
assertions, and provide new insights for the compiler
writer and the architect.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
subject = "Computer Systems Organization --- Performance of
Systems (C.4): {\bf Performance attributes}; Computer
Systems Organization --- Performance of Systems (C.4):
{\bf Measurement techniques}",
}
@Article{Rinard:1999:EFG,
author = "Martin C. Rinard",
title = "Effective fine-grain synchronization for automatically
parallelized programs using optimistic synchronization
primitives",
journal = j-TOCS,
volume = "17",
number = "4",
pages = "337--371",
month = nov,
year = "1999",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/1999-17-4/p337-rinard/",
abstract = "This article presents our experience using optimistic
synchronization to implement fine-grain atomic
operations in the context of a parallelizing compiler
for irregular, object-based computations. Our
experience shows that the synchronization requirements
of these programs differ significantly from those of
traditional parallel computations, which use loop nests
to access dense matrices using affine access functions.
In addition to coarse-grain barrier synchronization,
our irregular computations require synchronization
primitives that support efficient fine-grain atomic
operations. The standard implementation mechanism for
atomic operations uses mutual exclusion locks. But the
overhead of acquiring and releasing locks can reduce
the performance. Locks can also consume significant
amounts of memory. Optimistic synchronization
primitives such as {\em load-linked/store
conditional\/} are an attractive alternative. They
require no additional memory and eliminate the use of
heavyweight blocking synchronization constructs. We
evaluate the effectiveness of optimistic
synchronization by comparing experimental results from
two versions of a parallelizing compiler for irregular,
object-based computations. One version generates code
that uses mutual exclusion locks to make operations
execute atomically. The other version generates code
that uses mutual exclusion locks to make operations
execute atomically. The other version uses optimistic
synchronization. We used this compiler to automatically
parallelize three irregular, object-based benchmark
applications of interest to the scientific and
engineering computation community. The presented
experimental results indicate that the use of
optimistic synchronization in this context can
significantly reduce the memory consumption and improve
the overall performance.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "atomic operations commutativity analysis; optimistic
synchronization; parallel computing; parallelizing
compilers; synchronization",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Compilers}",
}
@Article{Keleher:2000:HLA,
author = "Peter J. Keleher",
title = "A high-level abstraction of shared accesses",
journal = j-TOCS,
volume = "18",
number = "1",
pages = "1--36",
month = feb,
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-1/p1-keleher/",
abstract = "We describe the design and use of the {\em tape\/}
mechanism, a new high-level abstraction of accesses to
shared data for software DSMs. Tapes consolidate and
generalize a number of recent protocol optimizations,
including update-based locks and recorded-replay
barriers. Tapes are usually created by ``recording''
shared accesses. The resulting recordings can be used
to anticipate future accesses by tailoring data
movement to application semantics. Tapes-based
mechanisms are layered on top of existing shared-memory
protocols, and are largely independent of the
underlying memory model. Tapes can also be used to
emulate the data-movement semantics of several
update-based protocol implementations, without altering
the underlying protocol implementation. We have used
tapes to create the Tapeworm synchronization library.
Tapeworm implements sophisticated record-replay
mechanisms across barriers, augments locks with
data-movement semantics, and allows the use of
producer-consumer segments, which move entire modified
segments when any portion of the segment is accessed.
We show that Tapeworm eliminates 85\% of remote misses,
reduces message traffic by 63\%, and improves
performance by an average of 29\% for our application
suite.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "DSM; programming libraries; shared memory; update
protocols",
subject = "Software --- Operating Systems --- Storage Management
(D.4.2); Software --- Operating Systems --- File
Systems Management (D.4.3); Software --- Operating
Systems --- File Systems Management (D.4.3): {\bf
Access methods}; Software --- Operating Systems ---
File Systems Management (D.4.3): {\bf Distributed file
systems}",
}
@Article{Pai:2000:ILU,
author = "Vivek S. Pai and Peter Druschel and Willy Zwaenepoel",
title = "{IO-Lite}: a unified {I/O} buffering and caching
system",
journal = j-TOCS,
volume = "18",
number = "1",
pages = "37--66",
month = feb,
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-1/p37-pai/",
abstract = "This article presents the design, implementation, and
evaluation of IO -Lite, a unified I/O buffering and
caching system for general-purpose operating systems.
IO-Lite unifies {\em all\/} buffering and caching in
the system, to the extent permitted by the hardware. In
particular, it allows applications, the interprocess
communication system, the file system, the file cache,
and the network subsystem to safely and concurrently
share a single physical copy of the data. Protection
and security are maintained through a combination of
access control and read-only sharing. IO-Lite
eliminates all copying and multiple buffering of I/O
data, and enables various cross-subsystem
optimizations. Experiments with a Web server show
performance improvements between 40 and 80\% on real
workloads as a result of IO-Lite.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "caching; I/O buffering; networking; zero-copy",
subject = "Software --- Operating Systems --- Communications
Management (D.4.4); Software --- Operating Systems ---
Performance (D.4.8)",
}
@Article{Schwartz:2000:SPA,
author = "Beverly Schwartz and Alden W. Jackson and W. Timothy
Strayer and Wenyi Zhou and R. Dennis Rockwell and Craig
Partbridge",
title = "Smart packets: applying active networks to network
management",
journal = j-TOCS,
volume = "18",
number = "1",
pages = "67--88",
month = feb,
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-1/p67-schwartz/",
abstract = "This article introduces Smart Packets and describes
the smart Packets architecture, the packet formats, the
language and its design goals, and security
considerations. Smart Packets is an Active Networks
project focusing on applying active networks technology
to network management and monitoring. Messages in
active networks are programs that are executed at nodes
on the path to one or more target hosts. Smart Packets
programs are written in a tightly encoded, safe
language specifically designed to support network
management and avoid dangerous constructs and accesses.
Smart Packets improves the management of large complex
networks by (1) moving management decision points
closer to the node being managed, (2) targeting
specific aspects of the node for information rather
than exhaustive collection via polling, and (3)
abstracting the management concepts to language
constructs, allowing nimble network control.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "active networks",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Network
Architecture and Design (C.2.1); Computer Systems
Organization --- Computer-Communication Networks ---
Network Operations (C.2.3); Software --- Programming
Languages --- Language Constructs and Features
(D.3.3)",
}
@Article{Brooks:2000:VBC,
author = "David Brooks and Margaret Martonosi",
title = "Value-based clock gating and operation packing:
dynamic strategies for improving processor power and
performance",
journal = j-TOCS,
volume = "18",
number = "2",
pages = "89--126",
month = may,
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p89-brooks/",
abstract = "The large address space needs of many current
applications have pushed processor designs toward
64-bit word widths. Although full 64-bit addresses and
operations are indeed sometimes needed, arithmetic
operations on much smaller quantities are still more
common. In fact, another instruction set trend has been
the introduction of instructions geared toward subword
operations on 16-bit quantities. For examples, most
major processors now include instruction set support
for multimedia operations allowing parallel execution
of several subword operations in the same ALU. This
article presents our observations demonstrating that
operations on ``narrow-width'' quantities are common
not only in multimedia codes, but also in more general
workloads. In fact, across the SPECint95 benchmarks,
over half the integer operation executions require 16
bits or less. Based on this data, we propose two
hardware mechanisms that dynamically recognize and
capitalize on these narrow-width operations. The first,
power-oriented optimization reduces processor power
consumption by using operand-value-based clock gating
to turn off portions of arithmetic units that will be
unused by narrow-width operations. This optimization
results in a 45\%--60\% reduction in the integer unit's
power consumption for the SPECint95 and MediaBench
benchmark suites. Applying this optimization to
SPECfp95 benchmarks results in slightly smaller power
reductions, but still seems warranted. These reductions
in integer unit power consumption equate to a 5\%--10\%
full-chip power savings. Our second,
performance-oriented optimization improves processor
performance by packing together narrow-width operations
so that they share a single arithmetic unit.
Conceptually similar to a dynamic form of MMX, this
optimization offers speedups of 4.3\%--6.2\% for
SPECint95 and 8.0\%--10.4\% for MediaBench.
\par
Overall, these optimizations highlight an increasing
opportunity for value-based optimizations to improve
both power and performance in current
microprocessors.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
subject = "Hardware --- Arithmetic and Logic Structures (B.2);
Computer Systems Organization --- Processor
Architectures --- Single Data Stream Architectures
(C.1.1): {\bf RISC/CISC, VLIW architectures}",
}
@Article{Ganger:2000:SUS,
author = "Gregory R. Ganger and Marshall Kirk McKusick and Craig
A. N. Soules and Yale N. Patt",
title = "Soft updates: a solution to the metadata update
problem in file systems",
journal = j-TOCS,
volume = "18",
number = "2",
pages = "127--153",
month = may,
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p127-ganger/",
abstract = "Metadata updates, such as file creation and block
allocation, have consistently been identified as a
source of performance, integrity, security, and
availability problems for files systems. Soft updates
is an implementation technique for low-cost sequencing
of fine-grained updates to write-back cache blocks.
Using soft updates to track and enforce metadata update
dependencies, a file system can safely use delayed
writes for almost all file operations. This article
describes soft updates, their incorporation into the
4.4BSD fast file system, and the resulting effects on
the system. We show that a disk-based file system using
soft updates achieves memory-based file system
performance while providing stronger integrity and
security guarantees than most disk-based file systems.
For workloads that frequently perform updates on
metadata (such as creating and deleting files), this
improves performance by more than a factor of two, a
factor of 20 when compared to the conventional
synchronous write approach, and by 4--19\% when
compared to an aggressive write-ahead logging approach.
In addition, soft updates can improve file system
availability by relegating crash-recovery assistance
(e.g., the {\em fsck\/} utility) to an optional and
background role, reducing file system recovery time to
less than one second.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
subject = "Computer Systems Organization --- Performance of
Systems (C.4): {\bf Design studies}; Computer Systems
Organization --- Performance of Systems (C.4): {\bf
Reliability, availability, and serviceability};
Computer Systems Organization --- Computer System
Implementation --- Servers (C.5.5); Software ---
Operating Systems --- Storage Management (D.4.2);
Software --- Operating Systems --- File Systems
Management (D.4.3); Data --- Files (E.5); Information
Systems --- Information Storage and Retrieval ---
Information Storage (H.3.2)",
}
@Article{Yeung:2000:MSM,
author = "Donald Yeung and John Kubiatowicz and Anant Agarwal",
title = "Multigrain shared memory",
journal = j-TOCS,
volume = "18",
number = "2",
pages = "154--196",
month = may,
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Sep 26 07:54:31 MDT 2000",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-2/p154-yeung/",
abstract = "Parallel workstations, each comprising tens of
processors based on shared memory, promise
cost-effective scalable multiprocessing. This article
explores the coupling of such small- to medium-scale
shared-memory multiprocessors through software over a
local area network to synthesize larger shared-memory
systems. We call these systems Distributed
Shared-memory MultiProcessors (DSMPs). This article
introduces the design of a shared-memory system that
uses multiple granularities of sharing, called MGS, and
presents a prototype implementation of MGS on the MIT
Alewife multiprocessor. Multigrain shared memory
enables the collaboration of hardware and software
shared memory, thus synthesizing a single transparent
shared-memory address space across a cluster of
multiprocessors. The system leverages the efficient
support for fine-grain cache-line sharing within
multiprocessor nodes as often as possible, and resorts
to coarse-grain page-level sharing across nodes only
when absolutely necessary. Using our prototype
implementation of MGS, an in-depth study of several
shared-memory application is conducted to understand
the behavior of DSMPs. Our study is the first to
comprehensively explore the DSMP design space, and to
compare the performance of DSMPs against all-software
and all-hardware DSMs on a single experimental
platform. Keeping the total number of processors fixed,
we show that applications execute up to 85\% faster on
a DSMP as compared to an all-software DSM. We also show
that all-hardware DSMs hold a significant performance
advantage over DSMPs on challenging applications,
between 159\% and 1014\%. However, program
transformations to improve data locality for these
applications allow DSMPs to almost match the
performance of an all-hardware multiprocessor of the
same size.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
subject = "Hardware --- Memory Structures --- Design Styles
(B.3.2): {\bf Shared memory}; Computer Systems
Organization --- Processor Architectures --- Multiple
Data Stream Architectures (Multiprocessors) (C.1.2)",
}
@Article{Aron:2000:STE,
author = "Mohit Aron and Peter Druschel",
title = "Soft timers: efficient microsecond software timer
support for network processing",
journal = j-TOCS,
volume = "18",
number = "3",
pages = "197--228",
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2000-18-3/p197-aron/p197-aron.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p197-aron/",
abstract = "This paper proposes and evaluates soft timers, a new
operating system facility that allows the efficient
scheduling of software events at a granularity down to
tens of microseconds. Soft timers can be used to avoid
interrupts and reduce context switches associated with
network processing, without sacrificing low
communication delays. More specifically, soft timers
enable transport protocols like TCP to efficiently
perform rate-based clocking of packet transmissions.
Experiments indicate that soft timers allow a server to
employ rate-based clocking with little CPU overhead
(2-6\%) at high aggregate bandwidths. Soft timers can
also be used to perform network polling, which
eliminates network interrupts and increases the memory
access locality of the network subsystem without
sacrificing delay. Experiments show that this technique
can improve the throughput of a Web server by up to
25\%.",
acknowledgement = ack-nhfb,
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "polling; timers; transmission scheduling",
subject = "Computer Systems Organization --- Computer System
Implementation --- Servers (C.5.5); Software ---
Operating Systems --- Process Management (D.4.1): {\bf
Scheduling}; Software --- Operating Systems ---
Communications Management (D.4.4): {\bf Network
communication}",
}
@Article{Govil:2000:CDR,
author = "Kingshuk Govil and Dan Teodosiu and Yongqiang Huang
and Mendel Rosenblum",
title = "Cellular disco: resource management using virtual
clusters on shared-memory multiprocessors",
journal = j-TOCS,
volume = "18",
number = "3",
pages = "229--262",
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Nov 13 18:22:48 MST 2000",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p229-govil/",
abstract = "Despite the fact that large-scale shared-memory
multiprocessors have been commercially available for
several years, system software that fully utilizes all
their features is still not available, mostly due to
the complexity and cost of making the required changes
to the operating system. A recently proposed approach,
called Disco, substantially reduces this development
cost by using a virtual machine monitor that leverages
the existing operating system technology. In this paper
we present a system called Cellular Disco that extends
the Disco work to provide all the advantages of the
hardware partitioning and scalable operating system
approaches. We argue that Cellular Disco can achieve
these benefits at only a small fraction of the
development cost of modifying the operating system.
Cellular Disco effectively turns a large-scale
shared-memory multiprocessor into a virtual cluster
that supports fault containment and heterogeneity,
while avoiding operating system scalability
bottlenecks. Yet at the same time, Cellular Disco
preserves the benefits of a shared-memory
multiprocessor by implementing dynamic, fine-grained
resource sharing, and by allowing users to overcommit
resources such as processors and memory. This hybrid
approach requires a scalable resource manager that
makes local decisions with limited information while
still providing good global performance and fault
containment. In this paper we describe our experience
with a Cellular Disco prototype on a 32-processor SGI
Origin 2000 system. We show that the execution time
penalty for this approach is low, typically within 10\%
of the best available commercial operating system
foremost workloads, and that it can manage the CPU and
memory resources of the machine significantly better
than the hardware partitioning approach.",
acknowledgement = ack-nhfb,
generalterms = "Design; Management; Performance; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "fault containment; resource management; scalable
multiprocessors; virtual machines",
subject = "Software --- Operating Systems --- Process Management
(D.4.1); Software --- Operating Systems --- Storage
Management (D.4.2); Software --- Operating Systems ---
Reliability (D.4.5); Computer Systems Organization ---
Processor Architectures (C.1)",
}
@Article{Kohler:2000:CMR,
author = "Eddie Kohler and Robert Morris and Benjie Chen and
John Jannotti and M. Frans Kaashoek",
title = "The click modular router",
journal = j-TOCS,
volume = "18",
number = "3",
pages = "263--297",
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Nov 13 18:22:48 MST 2000",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p263-kohler/",
abstract = "Clicks is a new software architecture for building
flexible and configurable routers. A Click router is
assembled from packet processing modules called {\em
elements}. Individual elements implement simple router
functions like packet classification, queuing,
scheduling, and interfacing with network devices. A
router configurable is a directed graph with elements
at the vertices; packets flow along the edges of the
graph. Several features make individual elements more
powerful and complex configurations easier to write,
including {\em pull connections}, which model packet
flow driven by transmitting hardware devices, and {\em
flow-based router context}, which helps an element
locate other interesting elements. Click configurations
are modular and easy to extend. A standards-compliant
Click IP router has 16 elements on its forwarding path;
some of its elements are also useful in Ethernet
switches and IP tunnelling configurations. Extending
the IP router to support dropping policies, fairness
among flows, or Differentiated Services simply requires
adding a couple of element at the right place. On
conventional PC hardware, the Click IP router achieves
a maximum loss-free forwarding rate of 333,000 64-byte
packets per second, demonstrating that Click's modular
and flexible architecture is compatible with good
performance.",
acknowledgement = ack-nhfb,
generalterms = "Design; Management; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "component systems; routers; software router
performance",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Network
Architecture and Design (C.2.1): {\bf Packet-switching
networks}; Computer Systems Organization ---
Computer-Communication Networks --- Internetworking
(C.2.6): {\bf Routers}; Software --- Software
Engineering --- Software Architectures (D.2.11): {\bf
Domain-specific architectures}",
}
@Article{Saito:2000:MAP,
author = "Yasushi Saito and Brian N. Bershad and Henry M. Levy",
title = "Manageability, availability, and performance in
{Porcupine}: a highly scalable, cluster-based mail
service",
journal = j-TOCS,
volume = "18",
number = "3",
pages = "298--298",
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Nov 13 18:22:48 MST 2000",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2000-18-3/p298-saito/",
abstract = "This paper describes the motivation, design and
performance of Porcupine, a scalable mail server. The
goal of Porcupine is to provide a highly available and
scalable electronic mail service using a large cluster
of commodity PCs. We designed Porcupine to be easy to
manage by emphasizing dynamic load balancing, automatic
configuration, and graceful degradation in the presence
of failures. Key to the system's manageability,
availability, and performance is that sessions, data,
and underlying services are distributed homogeneously
and dynamically across nodes in a cluster.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Management; Performance; Reliability",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "cluster; distributed systems; email; group membership
protocol; load balancing; replication",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Distributed Systems
(C.2.4): {\bf Distributed applications}; Computer
Systems Organization --- Performance of Systems (C.4):
{\bf Reliability, availability, and serviceability};
Computer Systems Organization --- Computer System
Implementation --- Servers (C.5.5); Software ---
Operating Systems --- Reliability (D.4.5): {\bf
Fault-tolerance}; Information Systems --- Information
Storage and Retrieval --- Systems and Software (H.3.4):
{\bf Distributed systems}; Information Systems ---
Information Systems Applications --- Communications
Applications (H.4.3): {\bf Electronic mail}",
}
@Article{Gontmakher:2000:JCN,
author = "Alex Gontmakher and Assaf Schuster",
title = "{Java} consistency: nonoperational characterizations
for {Java} memory behavior",
journal = j-TOCS,
volume = "18",
number = "4",
pages = "333--386",
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2000-18-4/p333-gontmakher/p333-gontmakher.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2000-18-4/p333-gontmakher/",
abstract = "The Java Language Specification (JLS) [Gosling et al.
1996] provides an operational definition for the
consistency of shared variables. The definition remains
unchanged in the JLS 2nd edition, currently under peer
review, which relies on a specific abstract machine as
its underlying model, is very complicated. Several
subsequent works have tried to simplify and formalize
it. However, these revised definitions are also
operational, and thus have failed to highlight the
intuition behind the original specification. In this
work we provide a complete nonoperational specification
for Java and for the JVM, excluding synchronized
operations. We provide a simpler definition, in which
we clearly distinguish the consistency model that is
promised to the programmer from that which should be
implemented in the JVM. This distinction, which was
implicit in the original definition, is crucial for
building the JVM. We find that the programmer model is
strictly weaker than that of the JVM, and precisely
define their discrepancy. Moreover, our definition is
independent of any specific (or even abstract) machine,
and can thus be used to verify JVM implementations and
compiler optimizations on any platform. Finally, we
show the precise range of consistency relaxations
obtainable for the Java memory model when a certain
compiler optimization-- called {\em prescient stores\/}
in JLS--is applicable.",
acknowledgement = ack-nhfb,
generalterms = "Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Java memory models; multithreading; nonoperational
specification",
subject = "Hardware --- Memory Structures --- Performance
Analysis and Design Aids** (B.3.3): {\bf Formal
models**}",
}
@Article{Sarkar:2000:HBC,
author = "Prasenjit Sarkar and John H. Hartman",
title = "Hint-based cooperative caching",
journal = j-TOCS,
volume = "18",
number = "4",
pages = "387--419",
year = "2000",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2000-18-4/p387-sarkar/p387-sarkar.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2000-18-4/p387-sarkar/",
abstract = "This article presents the design, implementation, and
measurement of a hint-based cooperative caching file
system. Hints allow clients to make decisions based on
local state, enabling a loosely coordinated system that
is simple to implement. The resulting performance is
comparable to that of existing tightly coordinated
algorithms that use global state, but with less
overhead. Simulations show that the block access times
of our system are as good as those of the existing
algorithms, while reducing manager load by more than a
factor of seven, block lookup traffic by nearly a
factor of two-thirds, and replacement traffic a factor
of five. To verify our simulation results in a real
system with real users, we implemented a prototype and
measured its performance for one week. Although the
simulation and prototype environments were very
different, the prototype system mirrored the simulation
results by exhibiting reduced overhead and high hint
accuracy. Furthermore, hint-based cooperative caching
reduced the average block access time to almost half
that of NFS.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Design; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "cooperative caching; hints",
subject = "Software --- Operating Systems --- File Systems
Management (D.4.3)",
}
@Article{Bilas:2001:ASV,
author = "Angelos Bilas and Dongming Jiang and Jaswinder Pal
Singh",
title = "Accelerating shared virtual memory via general-purpose
network interface support",
journal = j-TOCS,
volume = "19",
number = "1",
pages = "1--35",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-1/p1-bilas/p1-bilas.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-1/p1-bilas/",
abstract = "Clusters of symmetric multiprocessors (SMPs) are
important platforms for high-performance computing.
With the success of hardware cache-coherent distributed
shared memory (DSM), a lot of effort has also been made
to support the coherent shared-address-space
programming model in software on clusters. Much
research has been done in fast communication on
clusters and in protocols for supporting software
shared memory across them. However, the performance of
software virtual memory (SVM) is still far from that
achieved on hardware DSM systems. The goal of this
paper is to improve the performance of SVM on system
area network clusters by considering communication and
protocol layer interactions. We first examine what are
the important communication system bottlenecks that
stand in the way of improving parallel performance of
SVM clusters; in particular, which parameters of the
communication architecture are most important to
improve further relative to processor speed, which ones
are already adequate on modern systems for most
applications, and how will this change with technology
in the future. We find that the most important
communication subsystem cost to improve is the overhead
of generating and delivery interrupts for asynchronous
protocol processing. Then we proceed to show, that by
providing simple and general support for asynchronous
message handling in a commodity network interface (NI)
and by altering SVM protocols appropriately, protocol
activity can be decoupled from asynchronous message
handling, and the need for interrupts or polling can be
eliminated. The NI mechanisms needed are generic, not
SVM-dependent. We prototype the mechanisms and such a
{\em synchronous home-based LRC\/} protocol, called
{\em GeNIMA\/} (GEneral-purpose Network Interface
support for shared Memory Abstractions), on a cluster
of SMPs with a programmable NI. We find that the
performance improvements are substantial, bringing
performance on a small-scale SMP cluster much closer to
that of hardware-coherent shared memory for many
applications, and we show the value of each of the
mechanisms in different applications.",
acknowledgement = ack-nhfb,
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "applications; clusters; shared virtual memory; system
area networks",
subject = "Computer Systems Organization --- Performance of
Systems (C.4)",
}
@Article{Grimm:2001:SAC,
author = "Robert Grimm and Brian N. Bershad",
title = "Separating access control policy, enforcement, and
functionality in extensible systems",
journal = j-TOCS,
volume = "19",
number = "1",
pages = "36--70",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-1/p36-grimm/p36-grimm.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-1/p36-grimm/",
abstract = "Extensible systems, such as Java or the SPIN
extensible operating system, allow for units of code,
or extensions, to be added to a running system in
almost arbitrary fashion. Extensions closely interact
through low-latency but type-safe interfaces to form a
tightly integrated system. As extensions can come from
arbitrary sources, not all of whom can be trusted to
conform to an organization's security policy, such
structuring raises the question of how security
constraints are enforced in an extensible system. In
this paper, we present an access control mechanism for
extensible systems to address this problem. Our access
control mechanism decomposes access control into a
policy-neutral enforcement manager and a security
policy manager, and it is transparent to extensions in
the absence of security violations. It structures the
system into protection domains, enforces protection
domains through access control checks, and performs
auditing of system operations. The access control
mechanism works by inspecting extensions for their
types and operations to determine which abstractions
require protection and by redirecting procedure or
method invocations to inject access control operations
into the system. We describe the design of this access
control mechanism, present an implementation within the
SPIN extensible operating systems, and provide a
qualitative as well as quantitative evaluation of the
mechanism.",
acknowledgement = ack-nhfb,
generalterms = "Security",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "access check; auditing; extensible systems; Java;
policy-neutral enforcement; protection domain;
protection domain transfer; security policy; SPIN",
subject = "Software --- Operating Systems (D.4); Software ---
Operating Systems --- General (D.4.0); Software ---
Operating Systems --- Security and Protection (D.4.6):
{\bf Access controls}",
}
@Article{Luk:2001:ACS,
author = "Chi-Keung Luk and Todd C. Mowry",
title = "Architectural and compiler support for effective
instruction prefetching: a cooperative approach",
journal = j-TOCS,
volume = "19",
number = "1",
pages = "71--109",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-1/p71-luk/p71-luk.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-1/p71-luk/",
abstract = "Instruction cache miss latency is becoming an
increasingly important performance bottleneck,
especially for commercial applications. Although
instruction prefetching is an attractive technique for
tolerating this latency, we find that existing
prefetching schemes are insufficient for modern
superscalar processors, since they fail to issue
prefetches early enough (particularly for nonsequential
accesses). To overcome these limitations, we propose a
new instruction prefetching technique whereby the
hardware and software {\em cooperate\/} to hide the
latency as follows. The hardware performs aggressive
sequential prefetching combined with a novel {\em
prefetch filtering\/} mechanism to allow it to get far
ahead without polluting the cache. To hide the latency
of nonsequential accesses, we propose and implement a
novel compiler algorithm which automatically inserts
{\em instruction-??\/} --- prefetch the targets of
control transfers far enough in advance. Our
experimental results demonstrate that this new approach
hides 50\% or more of the latency remaining with the
best previous techniques, while at the same time
reduces the number of useless prefetches by a factor of
six. We find that both the {\em prefetch filtering\/}
and {\em compiler-inserted prefetching\/} components of
our design are essential and complementary, and that
the compiler can limit the code expansion to only 9\%
on average. In addition, we show that the performance
of our technique can be further increased by using
profiling information to help reduce cache conflicts
and unnecessary prefetches. From an architectural
perspective, these performance advantages are sustained
over a range of common miss latencies and bandwidth.
Finally, our technique is cost effective as well, since
it delivers performance comparable to (or even better
than) that of larger caches, but requires a much
smaller hardware budget.",
acknowledgement = ack-nhfb,
generalterms = "Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "compiler optimization; instruction prefetching",
subject = "Software --- Programming Languages --- Processors
(D.3.4): {\bf Compilers}; Software --- Programming
Languages --- Processors (D.3.4): {\bf Optimization};
Hardware --- Memory Structures --- Design Styles
(B.3.2): {\bf Cache memories}",
}
@Article{Brown:2001:CBP,
author = "Angela Demke Brown and Todd C. Mowry and Orran
Krieger",
title = "Compiler-based {I/O} prefetching for out-of-core
applications",
journal = j-TOCS,
volume = "19",
number = "2",
pages = "111--170",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p111-brown/p111-brown.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p111-brown/",
abstract = "Current operating systems offer poor performance when
a numeric application's working set does not fit in
main memory. As a result, programmers who wish to solve
``out-of-core'' problems efficiently are typically
faced with the onerous task of rewriting an application
to use explicit I/O operations (e.g., read/write). In
this paper, we propose and evaluate a fully automatic
technique which liberates the programmer from this
task, provides high performance, and requires only
minimal changes to current operating systems. In our
scheme the compiler provides the crucial information on
future access patterns without burdening the
programmer; the operating system supports nonbinding
{\em prefetch\/} and {\em release\/} hints for managing
I/O; and the operating systems cooperates with a
run-time layer to accelerate performance by adapting to
dynamic behavior and minimizing prefetch overhead. This
approach maintains the abstraction of unlimited virtual
memory for the programmer, gives the compiler the
flexibility to aggressively insert prefetches ahead of
references, and gives the operating system the
flexibility to arbitrate between the competing resource
demands of multiple applications. We implemented our
compiler analysis within the SUIF compiler, and used it
to target implementations of our run-time and OS
support on both research and commercial systems
(Hurricane and IRIX 6.5, respectively). Our
experimental results show large performance gains for
out-of-core scientific applications on both systems:
more than 50\% of the I/O stall time has been eliminated
in most cases, thus translating into overall speedups
of roughly twofold in many cases.",
acknowledgement = ack-nhfb,
generalterms = "Design; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "compiler optimization; prefetching; virtual memory",
subject = "Software --- Operating Systems --- Storage Management
(D.4.2): {\bf Virtual memory}; Software --- Operating
Systems --- Performance (D.4.8); Software ---
Programming Languages --- Processors (D.3.4): {\bf
Compilers}; Software --- Programming Languages ---
Processors (D.3.4): {\bf Optimization}",
}
@Article{Fekete:2001:SUP,
author = "Alan Fekete and Nancy Lynch and Alex Shvartsman",
title = "Specifying and using a partitionable group
communication service",
journal = j-TOCS,
volume = "19",
number = "2",
pages = "171--216",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p171-fekete/p171-fekete.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p171-fekete/",
abstract = "Group communication services are becoming accepted as
effective building blocks for the construction of
fault-tolerant distributed applications. Many
specifications for group communication services have
been proposed. However, there is still no agreement
about what these specifications should say, especially
in cases where the services are {\em partitionable},
i.e., where communication failures may lead to
simultaneous creation of groups with disjoint
memberships, such that each group is unaware of the
existence of any other group. In this paper, we present
a new, succinct specification for a view-oriented
partitionable group communication service. The service
associates each message with a particular {\em view\/}
of the group membership. All send and receive events
for a message occur within the associated view. The
service provides a total order on the messages within
each view, and each processor receives a prefix of this
order. Our specification separates safety requirements
from performance and fault-tolerance requirements. The
safety requirements are expressed by an abstract,
global {\em state machine}. To present the performance
and fault-tolerance requirements, we include {\em
failure-status input actions\/} in the specification;
we then give properties saying that consensus on the
view and timely message delivery are guaranteed in an
execution provided that the execution {\em
stabilizes\/} to a situation in which the
failure-status stops changing and corresponds to
consistently partioned system. Because consensus is not
required in every execution, the specification is not
subject to the existing impossibility results for
partionable systems. Our specification has a simple
implementation, based on the membership algorithm of
Christian and Schmuck. We show the utility of the
specification by constructing an ordered-broadcast
application, using an algorithm (based on algorithms of
Amir, Dolev, Keidar, and others) that reconciles
information derived from different instantiations of
the group. The application manages the view-change
activity to build a shared sequence of messages, i.e.,
the per-view total orders of the group service are
combined to give a universal total order. We prove the
correctness and analyze the performance and
fault-tolerance of the resulting application.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Design; Performance; Verification",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "composable building blocks; conditional performance
analysis; distributed algorithms; group communication
protocols; message-passing protocols; ordered
broadcast; service specification; total-order
broadcast",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Distributed Systems
(C.2.4); Software --- Operating Systems --- Reliability
(D.4.5): {\bf Fault-tolerance}; Software --- Software
Engineering --- Software/Program Verification (D.2.4):
{\bf Correctness proofs}",
}
@Article{McNamee:2001:STT,
author = "Dylan McNamee and Jonathan Walpole and Calton Pu and
Crispin Cowan and Charles Krasic and Ashvin Goel and
Perry Wagle and Charles Consel and Gilles Muller and
Renauld Marlet",
title = "Specialization tools and techniques for systematic
optimization of system software",
journal = j-TOCS,
volume = "19",
number = "2",
pages = "217--251",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p217-mcnamee/p217-mcnamee.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p217-mcnamee/",
abstract = "Specialization has been recognized as a powerful
technique for optimizing operating systems. However,
specialization has not been broadly applied beyond the
research community because current techniques based on
manual specialization, are time-consuming and
error-prone. The goal of the work described in this
paper is to help operating system tuners perform
specialization more easily. We have built a
specialization toolkit that assists the major tasks of
specializing operating systems. We demonstrate the
effectiveness of the toolkit by applying it to three
diverse operating system components. We show that using
tools to assist specialization enables significant
performance optimizations without error-prone manual
modifications. Our experience with the toolkit suggests
new ways of designing systems that combine high
performance and clean structure.",
acknowledgement = ack-nhfb,
generalterms = "Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "operating system specialization; optimization;
software architecture",
subject = "Software --- Operating Systems --- Organization and
Design (D.4.7)",
}
@Article{Mendelson:2001:ESC,
author = "Avi Mendelson and Freddy Gabbay",
title = "The effect of seance communication on multiprocessing
systems",
journal = j-TOCS,
volume = "19",
number = "2",
pages = "252--281",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/articles/journals/tocs/2001-19-2/p252-mendelson/p252-mendelson.pdf;
http://www.acm.org/pubs/citations/journals/tocs/2001-19-2/p252-mendelson/",
abstract = "This paper introduces the seance communication
phenomenon and analyzes its effect on a multiprocessing
environment. Seance communication is an unnecessary
coherency-related activity that is associated with dead
cache information. Dead information may reside in the
cache for various reasons: task migration, context
switches, or working-set changes. Dead information does
not have a significant performance impact on a
single-processor system; however, it can dominate the
performance of multicache environment. In order to
evaluate the overhead of seance communication, we
develop an analytical model that is based on the
fractal behavior of the memory references. So far, all
previous works that used the same modeling approach
extracted the fractal parameters of a program manually.
This paper provides an additional important
contribution by demonstrating how these parameters can
be automatically extracted from the program trace. Our
analysis indicates that Seance communication may
severely reduce the overall system performance when
using write-update or write-invalidate cache coherency
protocols. In addition, we find that the performance of
write-update protocols is affected more severely than
write-invalidate protocols. The results that are
provided by our model are important for better
understanding of the coherency-related overhead in
multicache systems and for better development of
parallel applications and operating systems.",
acknowledgement = ack-nhfb,
generalterms = "Design; Experimentation; Measurement; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "cache coherency protocols; multicache systems;
performance analysis; seance communication",
subject = "Hardware --- Memory Structures (B.3); Hardware ---
Memory Structures --- Design Styles (B.3.2): {\bf Cache
memories}; Computer Systems Organization --- General
(C.0); Computer Systems Organization --- Processor
Architectures --- Multiple Data Stream Architectures
(Multiprocessors) (C.1.2): {\bf Interconnection
architectures}",
}
@Article{Arpaci-Dusseau:2001:ICC,
author = "Andrea Carol Arpaci-Dusseau",
title = "Implicit coscheduling: coordinated scheduling with
implicit information in distributed systems",
journal = j-TOCS,
volume = "19",
number = "3",
pages = "283--331",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2001-19-3/p283-arpaci-dusseau/",
abstract = "In modern distributed systems, coordinated
time-sharing is required for communicating processes to
leverage the performance of switch-based networks and
low-overhead protocols. Coordinated time-sharing has
traditionally been achieved with gang scheduling or
explicit coscheduling, implementations of which often
suffer from many deficiencies: multiple points of
failure, high context-switch overheads, and poor
interaction with client-server, interactive, and
I/O-intensive workloads. {\em Implicit coscheduling\/}
dynamically coordinates communicating processes across
distributed machines without these structural
deficiencies. In implicit coscheduling, no
communication is required across operating system
schedulers; instead, cooperating processes achieve
coordination by reacting to {\em implicit
information\/} carried by communication existing within
the parallel application. The implementation of this
approach is simple and allows participating nodes to
act autonomously. We introduce two key mechanisms in
implicit coscheduling. The first is {\em conditional
two-phase waiting}, a generalization of traditional
two-phase waiting in which spin-time may be increased
depending upon events occurring while the process
waits. The second is an extension to stride scheduling
that provides preemption and is fair to processes that
block. To demonstrate that implicit coscheduling
performs well, we show that results from an extensive
set of simulation and implementation experiments. To
exercise the conditional two-phase waiting algorithm,
we examine three workloads: bulk-synchronous and
continuous-communication synthetic applications and
application kernels written in the Split-C language. To
exercise the local scheduler, we examine competing jobs
with different communication characteristics. We
demonstrate that our implementation scales well with
the number of jobs and workstations and is robust to
process placement. Our experiments show that implicit
coscheduling is effective and fair for a wide of
workloads; most perform within 30\% of an idealized
model of gang scheduling.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Design; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "clusters; coscheduling; gang scheduling; networks of
workstations; proportional-share scheduling; two-phase
waiting",
subject = "Software --- Operating Systems --- Process Management
(D.4.1): {\bf Scheduling}; Computer Systems
Organization --- Computer-Communication Networks ---
Distributed Systems (C.2.4): {\bf Network operating
systems}",
}
@Article{Carzaniga:2001:DEW,
author = "Antonio Carzaniga and David S. Rosenblum and Alexander
L. Wolf",
title = "Design and evaluation of a wide-area event
notification service",
journal = j-TOCS,
volume = "19",
number = "3",
pages = "332--383",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2001-19-3/p332-carzaniga/",
abstract = "The components of a loosely coupled system are
typically designed to operate by generating and
responding to asynchronous events. An {\em event
notification service\/} is an application-independent
infrastructure that supports the construction of
event-based systems, whereby generators of events
publish event notifications to the infrastructure and
consumers of events subscribe with the infrastructure
to receive relevant notification. The two primary
services that should be provided to components by the
infrastructure are notification selection (i.e.,
determining which notifications match which
subscriptions) and notification delivery (i.e., routing
matching notifications from publishers to subscribers).
Numerous event notification services have been
developed for local-area networks, generally based on a
centralized server to select and deliver event
notifications. Therefore, they suffer from an inherent
inability to scale to wide-area networks, such as the
internet, where the number and physical distribution of
the service's clients can quickly overwhelm a
centralized solution. The critical challenge in the
setting of a wide-area network is to maximize the
expressiveness in the selection mechanism without
sacrificing scalability in the delivery mechanism. This
paper presents Siena, an event notification service
that we have designed and implemented to exhibit both
expressiveness and scalability. We describe the
service's interface to applications, the algorithms
used by networks of servers to select and deliver event
notifications, and the strategies used to optimize
performance. We also present results of simulation
studies that examine the scalability and performance of
the service.",
acknowledgement = ack-nhfb,
generalterms = "Algorithms; Experimentation; Performance",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "content-based addressing and routing; event
notification; publish/subscribe",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Network
Architecture and Design (C.2.1): {\bf Distributed
networks}; Computer Systems Organization ---
Computer-Communication Networks --- Network Protocols
(C.2.2); Computer Systems Organization ---
Computer-Communication Networks --- Distributed Systems
(C.2.4): {\bf Distributed applications}; Computer
Systems Organization --- Computer-Communication
Networks --- Local and Wide-Area Networks (C.2.5): {\bf
Internet}; Computer Systems Organization ---
Computer-Communication Networks --- Internetworking
(C.2.6): {\bf Routers}; Computer Systems Organization
--- Performance of Systems (C.4): {\bf Design studies};
Computing Methodologies --- Simulation and Modeling ---
Applications (I.6.3); Computing Methodologies ---
Simulation and Modeling --- Model Validation and
Analysis (I.6.4); Computing Methodologies ---
Simulation and Modeling --- Types of Simulation
(I.6.8): {\bf Discrete event}",
}
@Article{Maxemchuk:2001:IMS,
author = "N. F. Maxemchuk and D. H. Shur",
title = "An {Internet} multicast system for the stock market",
journal = j-TOCS,
volume = "19",
number = "3",
pages = "384--412",
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jul 18 10:18:45 MDT 2001",
bibsource = "http://www.acm.org/pubs/toc/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "http://www.acm.org/pubs/citations/journals/tocs/2001-19-3/p384-maxemchuk/",
abstract = "We are moving toward an international, 24-hour,
distributed, electronic stock exchange. The exchange
will use the global internet, or Internet, technology.
This system is a natural application of multicast
because there are a large number of receivers that
should receive the same information simultaneously. The
data requirements for the stock exchange are discussed.
The current multi-cast protocols lack the reliability,
fairness, and scalability needed in this application.
We describe a distributed architecture and a timed
reliable multicast protocol, TRMP, that has the
appropriate characteristics. We consider three
applications: (1) A unified stock ticker of the
transactions that are being conducted on the various
physical and electronic exchanges. Our objective is to
deliver the same combined ticker reliably and
simultaneously to all receivers, anywhere in the world.
(2) A unified sequence of buy and sell offers that are
delivered to a single exchange or a collection of
exchanges. Our objectives is to give all traders the
same fair access to an exchange independent of their
relative distances to the exchange or the delay and
loss characteristics of the international network. (3)
A distributed, electronic trading floor that can
replace the current exchanges. This application has the
fairness attributes of the first two applications and
uses TRMP to conduct irrefutable, distributed trades.",
acknowledgement = ack-nhfb,
generalterms = "Design; Performance; Theory",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "multicast",
subject = "Computer Systems Organization ---
Computer-Communication Networks --- Network
Architecture and Design (C.2.1); Computer Systems
Organization --- Computer-Communication Networks ---
Network Protocols (C.2.2); Computer Systems
Organization --- Computer-Communication Networks ---
Distributed Systems (C.2.4)",
}
@Article{Collins:2001:RIC,
author = "Jamison D. Collins and Dean M. Tullsen",
title = "Runtime identification of cache conflict misses: {The}
adaptive miss buffer",
journal = j-TOCS,
volume = "19",
number = "4",
pages = "413--439",
month = nov,
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Feb 19 15:24:55 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Waldvogel:2001:SHS,
author = "Marcel Waldvogel and George Varghese and Jon Turner
and Bernhard Plattner",
title = "Scalable high-speed prefix matching",
journal = j-TOCS,
volume = "19",
number = "4",
pages = "440--482",
month = nov,
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Feb 19 15:24:55 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Alvarez:2001:MAR,
author = "Guillermo A. Alvarez and Elizabeth Borowsky and Susie
Go and Theodore H. Romer and Ralph Becker-Szendy and
Richard Golding and Arif Merchant and Mirjana
Spasojevic and Alistair Veitch and John Wilkes",
title = "{Minerva}: An automated resource provisioning tool
for large-scale storage systems",
journal = j-TOCS,
volume = "19",
number = "4",
pages = "483--518",
month = nov,
year = "2001",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Feb 19 15:24:55 MST 2002",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Fu:2002:FSD,
author = "Kevin Fu and M. Frans Kaashoek and David
Mazi{\`e}res",
title = "Fast and secure distributed read-only file system",
journal = j-TOCS,
volume = "20",
number = "1",
pages = "1--24",
month = feb,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:23 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Anderson:2002:IRR,
author = "Darrell C. Anderson and Jeffrey S. Chase and Amin M.
Vahdat",
title = "Interposed request routing for scalable network
storage",
journal = j-TOCS,
volume = "20",
number = "1",
pages = "25--48",
month = feb,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:23 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ganger:2002:FFA,
author = "Gregory R. Ganger and Dawson R. Engler and M. Frans
Kaashoek and H{\'e}ctor M. Brice{\~n}o and Russell Hunt
and Thomas Pinckney",
title = "Fast and flexible application-level networking on
exokernel systems",
journal = j-TOCS,
volume = "20",
number = "1",
pages = "49--83",
month = feb,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:23 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Satyanarayanan:2002:EC,
author = "M. Satyanarayanan",
title = "The evolution of {Coda}",
journal = j-TOCS,
volume = "20",
number = "2",
pages = "85--124",
month = may,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:24 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Burgess:2002:MSN,
author = "Mark Burgess and H{\aa}rek Haugerud and Sigmund
Straumsnes and Trond Reitan",
title = "Measuring system normality",
journal = j-TOCS,
volume = "20",
number = "2",
pages = "125--160",
month = may,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:24 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hu:2002:LCD,
author = "Zhigang Hu and Stefanos Kaxiras and Margaret
Martonosi",
title = "Let caches decay: reducing leakage energy via
exploitation of cache generational behavior",
journal = j-TOCS,
volume = "20",
number = "2",
pages = "161--190",
month = may,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:24 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Keidar:2002:MGM,
author = "Idit Keidar and Jeremy Sussman and Keith Marzullo and
Danny Dolev",
title = "{Moshe}: a group membership service for {WANs}",
journal = j-TOCS,
volume = "20",
number = "3",
pages = "191--238",
month = aug,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:17:48 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yu:2002:DEC,
author = "Haifeng Yu and Amin Vahdat",
title = "Design and evaluation of a conit-based continuous
consistency model for replicated services",
journal = j-TOCS,
volume = "20",
number = "3",
pages = "239--282",
month = aug,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:17:48 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "conit (consistency unit)",
}
@Article{Zdancewic:2002:SPP,
author = "Steve Zdancewic and Lantian Zheng and Nathaniel
Nystrom and Andrew C. Myers",
title = "Secure program partitioning",
journal = j-TOCS,
volume = "20",
number = "3",
pages = "283--328",
month = aug,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:17:48 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zhou:2002:CSD,
author = "Lidong Zhou and Fred B. Schneider and Robbert {Van
Renesse}",
title = "{COCA}: a secure distributed online certification
authority",
journal = j-TOCS,
volume = "20",
number = "4",
pages = "329--368",
month = nov,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:24 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jimenez:2002:NMD,
author = "Daniel A. Jim{\'e}nez and Calvin Lin",
title = "Neural methods for dynamic branch prediction",
journal = j-TOCS,
volume = "20",
number = "4",
pages = "369--397",
month = nov,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:24 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Castro:2002:PBF,
author = "Miguel Castro and Barbara Liskov",
title = "Practical {Byzantine} fault tolerance and proactive
recovery",
journal = j-TOCS,
volume = "20",
number = "4",
pages = "398--461",
month = nov,
year = "2002",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:24 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hu:2003:RTS,
author = "Y. Charlie Hu and Weimin Yu and Alan Cox and Dan
Wallach and Willy Zwaenepoel",
title = "Run-time support for distributed sharing in safe
languages",
journal = j-TOCS,
volume = "21",
number = "1",
pages = "1--35",
month = feb,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:21:30 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Arpaci-Dusseau:2003:RTA,
author = "Remzi H. Arpaci-Dusseau",
title = "Run-time adaptation in {River}",
journal = j-TOCS,
volume = "21",
number = "1",
pages = "36--86",
month = feb,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:21:30 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Nieh:2003:MTC,
author = "Jason Nieh and S. Jae Yang and Naomi Novik",
title = "Measuring thin-client performance using slow-motion
benchmarking",
journal = j-TOCS,
volume = "21",
number = "1",
pages = "87--115",
month = feb,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:21:30 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Nieh:2003:SSM,
author = "Jason Nieh and Monica S. Lam",
title = "A {SMART} scheduler for multimedia applications",
journal = j-TOCS,
volume = "21",
number = "2",
pages = "117--163",
month = may,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:25 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{VanRenesse:2003:ARS,
author = "Robbert {Van Renesse} and Kenneth P. Birman and Werner
Vogels",
title = "{Astrolabe}: a robust and scalable technology for
distributed system monitoring, management, and data
mining",
journal = j-TOCS,
volume = "21",
number = "2",
pages = "164--206",
month = may,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:25 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Harchol-Balter:2003:SBS,
author = "Mor Harchol-Balter and Bianca Schroeder and Nikhil
Bansal and Mukesh Agrawal",
title = "Size-based scheduling to improve {Web} performance",
journal = j-TOCS,
volume = "21",
number = "2",
pages = "207--233",
month = may,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:25 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ellis:2003:E,
author = "Carla Schlatter Ellis",
title = "Editorial",
journal = j-TOCS,
volume = "21",
number = "3",
pages = "235--235",
month = aug,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:26 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Castro:2003:BUA,
author = "Miguel Castro and Rodrigo Rodrigues and Barbara
Liskov",
title = "{BASE}: {Using} abstraction to improve fault
tolerance",
journal = j-TOCS,
volume = "21",
number = "3",
pages = "236--269",
month = aug,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:26 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Estan:2003:NDT,
author = "Cristian Estan and George Varghese",
title = "New directions in traffic measurement and accounting:
{Focusing} on the elephants, ignoring the mice",
journal = j-TOCS,
volume = "21",
number = "3",
pages = "270--313",
month = aug,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:26 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Swanson:2003:ESI,
author = "Steven Swanson and Luke K. McDowell and Michael M.
Swift and Susan J. Eggers and Henry M. Levy",
title = "An evaluation of speculative instruction execution on
simultaneous multithreaded processors",
journal = j-TOCS,
volume = "21",
number = "3",
pages = "314--340",
month = aug,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Aug 7 10:13:26 MDT 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Eugster:2003:LPB,
author = "P. Th. Eugster and R. Guerraoui and S. B. Handurukande
and P. Kouznetsov and A.-M. Kermarrec",
title = "Lightweight probabilistic broadcast",
journal = j-TOCS,
volume = "21",
number = "4",
pages = "341--374",
month = nov,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Oct 31 06:17:27 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hadzic:2003:BPF,
author = "Ilija Had{\v{z}}i{\'c} and Jonathan M. Smith",
title = "Balancing performance and flexibility with hardware
support for network architectures",
journal = j-TOCS,
volume = "21",
number = "4",
pages = "375--411",
month = nov,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Oct 31 06:17:27 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Annavaram:2003:CGP,
author = "Murali Annavaram and Jignesh M. Patel and Edward S.
Davidson",
title = "Call graph prefetching for database applications",
journal = j-TOCS,
volume = "21",
number = "4",
pages = "412--444",
month = nov,
year = "2003",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Oct 31 06:17:27 MST 2003",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Reumann:2004:SDI,
author = "John Reumann and Kang G. Shin",
title = "Stateful distributed interposition",
journal = j-TOCS,
volume = "22",
number = "1",
pages = "1--48",
month = feb,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Feb 2 14:07:29 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Olshefski:2004:UCI,
author = "David Olshefski and Jason Nieh and Dakshi Agrawal",
title = "Using {Certes} to infer client response time at the
{Web} server",
journal = j-TOCS,
volume = "22",
number = "1",
pages = "49--93",
month = feb,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Feb 2 14:07:29 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Certes (CliEnt Response Time Estimated by the
Server)",
}
@Article{Adve:2004:PPP,
author = "Vikram S. Adve and Mary K. Vernon",
title = "Parallel program performance prediction using
deterministic task graph analysis",
journal = j-TOCS,
volume = "22",
number = "1",
pages = "94--136",
month = feb,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Feb 2 14:07:29 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Flinn:2004:MBL,
author = "Jason Flinn and M. Satyanarayanan",
title = "Managing battery lifetime with energy-aware
adaptation",
journal = j-TOCS,
volume = "22",
number = "2",
pages = "137--179",
month = may,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 4 08:16:45 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ashok:2004:CCE,
author = "Raksit Ashok and Saurabh Chheda and Csaba Andras
Moritz",
title = "Coupling compiler-enabled and conventional memory
accessing for energy efficiency",
journal = j-TOCS,
volume = "22",
number = "2",
pages = "180--213",
month = may,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 4 08:16:45 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Choi:2004:GFP,
author = "Seungryul Choi and Nicholas Kohout and Sumit Pamnani
and Dongkeun Kim and Donald Yeung",
title = "A general framework for prefetch scheduling in linked
data structures and its application to multi-chain
prefetching",
journal = j-TOCS,
volume = "22",
number = "2",
pages = "214--280",
month = may,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 4 08:16:45 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Verstoep:2004:CCP,
author = "Kees Verstoep and Raoul A. F. Bhoedjang and Tim
R{\"u}hl and Henri E. Bal and Rutger F. H. Hofman",
title = "Cluster communication protocols for
parallel-programming systems",
journal = j-TOCS,
volume = "22",
number = "3",
pages = "281--325",
month = aug,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 4 08:16:45 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kim:2004:SSL,
author = "Dongkeun Kim and Donald Yeung",
title = "A study of source-level compiler algorithms for
automatic construction of pre-execution code",
journal = j-TOCS,
volume = "22",
number = "3",
pages = "326--379",
month = aug,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 4 08:16:45 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bartal:2004:FNF,
author = "Yair Bartal and Alain Mayer and Kobbi Nissim and
Avishai Wool",
title = "{{\em Firmato\/}}: a novel firewall management
toolkit",
journal = j-TOCS,
volume = "22",
number = "4",
pages = "381--420",
month = nov,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 2 05:29:12 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Grimm:2004:SSP,
author = "Robert Grimm and Janet Davis and Eric Lemar and Adam
Macbeth and Steven Swanson and Thomas Anderson and
Brian Bershad and Gaetano Borriello and Steven Gribble
and David Wetherall",
title = "System support for pervasive applications",
journal = j-TOCS,
volume = "22",
number = "4",
pages = "421--486",
month = nov,
year = "2004",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 2 05:29:12 MST 2004",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ellis:2005:E,
author = "Carla Schlatter Ellis",
title = "Editorial",
journal = j-TOCS,
volume = "23",
number = "1",
pages = "1--1",
month = feb,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Apr 14 10:29:37 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Maniatis:2005:LPP,
author = "Petros Maniatis and Mema Roussopoulos and T. J. Giuli
and David S. H. Rosenthal and Mary Baker",
title = "The {LOCKSS} peer-to-peer digital preservation
system",
journal = j-TOCS,
volume = "23",
number = "1",
pages = "2--50",
month = feb,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Apr 14 10:29:37 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{King:2005:BI,
author = "Samuel T. King and Peter M. Chen",
title = "Backtracking intrusions",
journal = j-TOCS,
volume = "23",
number = "1",
pages = "51--76",
month = feb,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Apr 14 10:29:37 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Swift:2005:IRC,
author = "Michael M. Swift and Brian N. Bershad and Henry M.
Levy",
title = "Improving the reliability of commodity operating
systems",
journal = j-TOCS,
volume = "23",
number = "1",
pages = "77--110",
month = feb,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Apr 14 10:29:37 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Gluhovsky:2005:CMC,
author = "Ilya Gluhovsky and Brian O'Krafka",
title = "Comprehensive multiprocessor cache miss rate
generation using multivariate models",
journal = j-TOCS,
volume = "23",
number = "2",
pages = "111--145",
month = may,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon May 9 11:20:41 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Herlihy:2005:NMM,
author = "Maurice Herlihy and Victor Luchangco and Paul Martin
and Mark Moir",
title = "Nonblocking memory management support for
dynamic-sized data structures",
journal = j-TOCS,
volume = "23",
number = "2",
pages = "146--196",
month = may,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon May 9 11:20:41 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jimenez:2005:ILA,
author = "Daniel A. Jim{\'e}nez",
title = "Improved latency and accuracy for neural branch
prediction",
journal = j-TOCS,
volume = "23",
number = "2",
pages = "197--218",
month = may,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon May 9 11:20:41 MDT 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jelasity:2005:GBA,
author = "M{\'a}rk Jelasity and Alberto Montresor and Ozalp
Babaoglu",
title = "{Gossip}-based aggregation in large dynamic networks",
journal = j-TOCS,
volume = "23",
number = "3",
pages = "219--252",
month = aug,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Nov 18 08:19:50 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Steffan:2005:SAT,
author = "J. Gregory Steffan and Christopher Colohan and Antonia
Zhai and Todd C. Mowry",
title = "The {STAMPede} approach to thread-level speculation",
journal = j-TOCS,
volume = "23",
number = "3",
pages = "253--300",
month = aug,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Nov 18 08:19:50 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kontothanassis:2005:SMC,
author = "Leonidas Kontothanassis and Robert Stets and Galen
Hunt and Umit Rencuzogullari and Gautam Altekar and
Sandhya Dwarkadas and Michael L. Scott",
title = "Shared memory computing on clusters with symmetric
multiprocessors and system area networks",
journal = j-TOCS,
volume = "23",
number = "3",
pages = "301--335",
month = aug,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Nov 18 08:19:50 MST 2005",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Anderson:2005:QFN,
author = "Eric Anderson and Susan Spence and Ram Swaminathan and
Mahesh Kallahalla and Qian Wang",
title = "Quickly finding near-optimal storage designs",
journal = j-TOCS,
volume = "23",
number = "4",
pages = "337--374",
month = nov,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 4 09:45:56 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Patino-Martinez:2005:MRC,
author = "Marta Pati{\~n}o-Martinez and Ricardo
Jim{\'e}nez-Peris and Bettina Kemme and Gustavo
Alonso",
title = "{MIDDLE-R}: {Consistent} database replication at the
middleware level",
journal = j-TOCS,
volume = "23",
number = "4",
pages = "375--423",
month = nov,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 4 09:45:56 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hsu:2005:AIL,
author = "Windsor W. Hsu and Alan Jay Smith and Honesty C.
Young",
title = "The automatic improvement of locality in storage
systems",
journal = j-TOCS,
volume = "23",
number = "4",
pages = "424--473",
month = nov,
year = "2005",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 4 09:45:56 MST 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Keromytis:2006:COS,
author = "Angelos D. Keromytis and Jason L. Wright and Theo {De
Raadt} and Matthew Burnside",
title = "Cryptography as an operating system service: a case
study",
journal = j-TOCS,
volume = "24",
number = "1",
pages = "1--38",
month = feb,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1124153.1124154",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Apr 7 08:15:08 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Cryptographic transformations are a fundamental
building block in many security applications and
protocols. To improve performance, several vendors
market hardware accelerator cards. However, until now
no operating system provided a mechanism that allowed
both uniform and efficient use of this new type of
resource. We present the OpenBSD Cryptographic
Framework (OCF), a service virtualization layer
implemented inside the operating system kernel, that
provides uniform access to accelerator functionality by
hiding card-specific details behind a carefully
designed API. We evaluate the impact of the OCF in a
variety of benchmarks, measuring overall system
performance, application throughput and latency, and
aggregate throughput when multiple applications make
use of it. We conclude that the OCF is extremely
efficient in utilizing cryptographic accelerator
functionality, attaining 95\% of the theoretical peak
device performance and over 800 Mbps aggregate
throughput using 3DES. We believe that this validates
our decision to opt for ease of use by applications and
kernel components through a uniform API and for
seamless support for new accelerators. Furthermore, our
evaluation points to several bottlenecks in system and
operating system design: data copying between user and
kernel modes, PCI bus signaling inefficiency, protocols
that use small data units, and single-threaded
applications. We identify some of these limitations
through a set of measurements focusing on
application-layer cryptographic protocols such as SSL.
We offer several suggestions for improvements and
directions for future work. We provide experimental
evidence of the effectiveness of a new approach which
we call operating system shortcutting. Shortcutting can
improve the performance of application-layer
cryptographic protocols by 27\% with very small changes
to the kernel.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Coarfa:2006:PAT,
author = "Cristian Coarfa and Peter Druschel and Dan S.
Wallach",
title = "Performance analysis of {TLS Web} servers",
journal = j-TOCS,
volume = "24",
number = "1",
pages = "39--69",
month = feb,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1124153.1124155",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Apr 7 08:15:08 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yu:2006:CLA,
author = "Haifeng Yu and Amin Vahdat",
title = "The costs and limits of availability for replicated
services",
journal = j-TOCS,
volume = "24",
number = "1",
pages = "70--113",
month = feb,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1124153.1124156",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Apr 7 08:15:08 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Moore:2006:IID,
author = "David Moore and Colleen Shannon and Douglas J. Brown
and Geoffrey M. Voelker and Stefan Savage",
title = "Inferring {Internet} denial-of-service activity",
journal = j-TOCS,
volume = "24",
number = "2",
pages = "115--139",
month = may,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1132026.1132027",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 18 08:01:47 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "In this article, we seek to address a simple question:
``How prevalent are denial-of-service attacks in the
Internet?'' Our motivation is to quantitatively
understand the nature of the current threat as well as
to enable longer-term analyses of trends and recurring
patterns of attacks. We present a new technique, called
``backscatter analysis,'' that provides a conservative
estimate of worldwide denial-of-service activity. We
use this approach on 22 traces (each covering a week or
more) gathered over three years from 2001 through 2004.
Across this corpus we quantitatively assess the number,
duration, and focus of attacks, and qualitatively
characterize their behavior. In total, we observed over
68,000 attacks directed at over 34,000 distinct victim
IP addresses---ranging from well-known e-commerce
companies such as Amazon and Hotmail to small foreign
ISPs and dial-up connections. We believe our technique
is the first to provide quantitative estimates of
Internet-wide denial-of-service activity and that this
article describes the most comprehensive public
measurements of such activity to date.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Holman:2006:LUP,
author = "Philip Holman and James H. Anderson",
title = "Locking under {Pfair} scheduling",
journal = j-TOCS,
volume = "24",
number = "2",
pages = "140--174",
month = may,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1132026.1132028",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 18 08:01:47 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We present several locking synchronization protocols
for Pfair-scheduled multiprocessor systems. We focus on
two classes of protocols. The first class is only
applicable in systems in which all critical sections
are short relative to the length of the scheduling
quantum. In this case, efficient synchronization can be
achieved by ensuring that all locks have been released
before tasks are preempted. This is accomplished by
exploiting the quantum-based nature of Pfair
scheduling, which provides a priori knowledge of all
possible preemption points. The second and more general
protocol class is applicable to any system. For this
class, we consider the use of a client-server model. We
also discuss the viability of inheritance-based
protocols in Pfair-scheduled systems.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lai:2006:PWA,
author = "Albert M. Lai and Jason Nieh",
title = "On the performance of wide-area thin-client
computing",
journal = j-TOCS,
volume = "24",
number = "2",
pages = "175--209",
month = may,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1132026.1132029",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 18 08:01:47 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "While many application service providers have proposed
using thin-client computing to deliver computational
services over the Internet, little work has been done
to evaluate the effectiveness of thin-client computing
in a wide-area network. To assess the potential of
thin-client computing in the context of future
commodity high-bandwidth Internet access, we have used
a novel, noninvasive slow-motion benchmarking technique
to evaluate the performance of several popular
thin-client computing platforms in delivering
computational services cross-country over Internet2.
Our results show that using thin-client computing in a
wide-area network environment can deliver acceptable
performance over Internet2, even when client and server
are located thousands of miles apart on opposite ends
of the country. However, performance varies widely
among thin-client platforms and not all platforms are
suitable for this environment. While many thin-client
systems are touted as being bandwidth efficient, we
show that network latency is often the key factor in
limiting wide-area thin-client performance.
Furthermore, we show that the same techniques used to
improve bandwidth efficiency often result in worse
overall performance in wide-area networks. We
characterize and analyze the different design choices
in the various thin-client platforms and explain which
of these choices should be selected for supporting
wide-area computing services.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Vachharajani:2006:LSE,
author = "Manish Vachharajani and Neil Vachharajani and David A.
Penry and Jason A. Blome and Sharad Malik and David I.
August",
title = "The {Liberty Simulation Environment}: a deliberate
approach to high-level system modeling",
journal = j-TOCS,
volume = "24",
number = "3",
pages = "211--249",
month = aug,
year = "2006",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 29 05:29:09 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Barr:2006:EAL,
author = "Kenneth C. Barr and Krste Asanovi{\'c}",
title = "Energy-aware lossless data compression",
journal = j-TOCS,
volume = "24",
number = "3",
pages = "250--291",
month = aug,
year = "2006",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 29 05:29:09 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yuan:2006:EEC,
author = "Wanghong Yuan and Klara Nahrstedt",
title = "Energy-efficient {CPU} scheduling for multimedia
applications",
journal = j-TOCS,
volume = "24",
number = "3",
pages = "292--331",
month = aug,
year = "2006",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1151690.1151692",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 29 05:29:09 MDT 2006",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Wireless transmission of a single bit can require over
1000 times more energy than a single computation. It
can therefore be beneficial to perform additional
computation to reduce the number of bits transmitted.
If the energy required to compress data is less than
the energy required to send it, there is a net energy
savings and an increase in battery life for portable
computers. This article presents a study of the energy
savings possible by losslessly compressing data prior
to transmission. A variety of algorithms were measured
on a StrongARM SA-110 processor. This work demonstrates
that, with several typical compression algorithms,
there is a actually a net energy increase when
compression is applied before transmission. Reasons for
this increase are explained and suggestions are made to
avoid it. One such energy-aware suggestion is
asymmetric compression, the use of one compression
algorithm on the transmit side and a different
algorithm for the receive path. By choosing the
lowest-energy compressor and decompressor on the test
platform, overall energy to send and receive data can
be reduced by 11\% compared with a well-chosen
symmetric pair, or up to 57\% over the default
symmetric {\tt zlib} scheme.",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Swift:2006:RDD,
author = "Michael M. Swift and Muthukaruppan Annamalai and Brian
N. Bershad and Henry M. Levy",
title = "Recovering device drivers",
journal = j-TOCS,
volume = "24",
number = "4",
pages = "333--360",
month = nov,
year = "2006",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:54 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Nightingale:2006:SED,
author = "Edmund B. Nightingale and Peter M. Chen and Jason
Flinn",
title = "Speculative execution in a distributed file system",
journal = j-TOCS,
volume = "24",
number = "4",
pages = "361--392",
month = nov,
year = "2006",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:54 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yang:2006:UMC,
author = "Junfeng Yang and Paul Twohey and Dawson Engler and
Madanlal Musuvathi",
title = "Using model checking to find serious file system
errors",
journal = j-TOCS,
volume = "24",
number = "4",
pages = "393--423",
month = nov,
year = "2006",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:54 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Higham:2007:SMC,
author = "Lisa Higham and Lillanne Jackson and Jalal Kawash",
title = "Specifying memory consistency of write buffer
multiprocessors",
journal = j-TOCS,
volume = "25",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:55 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "1",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Gluhovsky:2007:CME,
author = "Ilya Gluhovsky and David Vengerov and Brian O'Krafka",
title = "Comprehensive multivariate extrapolation modeling of
multiprocessor cache miss rates",
journal = j-TOCS,
volume = "25",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:55 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "2",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Behar:2007:TCS,
author = "Michael Behar and Avi Mendelson and Avinoam Kolodny",
title = "Trace cache sampling filter",
journal = j-TOCS,
volume = "25",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:55 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "3",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Swanson:2007:WA,
author = "Steven Swanson and Andrew Schwerin and Martha Mercaldi
and Andrew Petersen and Andrew Putnam and Ken Michelson
and Mark Oskin and Susan J. Eggers",
title = "The {WaveScalar} architecture",
journal = j-TOCS,
volume = "25",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:56 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "4",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Fraser:2007:CPL,
author = "Keir Fraser and Tim Harris",
title = "Concurrent programming without locks",
journal = j-TOCS,
volume = "25",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:56 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "5",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Appavoo:2007:EDO,
author = "Jonathan Appavoo and Dilma {Da Silva} and Orran
Krieger and Marc Auslander and Michal Ostrowski and
Bryan Rosenburg and Amos Waterland and Robert W.
Wisniewski and Jimi Xenidis and Michael Stumm and Livio
Soares",
title = "Experience distributing objects in an {SMMP OS}",
journal = j-TOCS,
volume = "25",
number = "3",
pages = "6:1--6:??",
month = aug,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:57 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "6",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Qin:2007:RTB,
author = "Feng Qin and Joseph Tucek and Yuanyuan Zhou and
Jagadeesan Sundaresan",
title = "Rx: {Treating} bugs as allergies---a safe method to
survive software failures",
journal = j-TOCS,
volume = "25",
number = "3",
pages = "7:1--7:??",
month = aug,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:57 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "7",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jelasity:2007:GBP,
author = "M{\'a}rk Jelasity and Spyros Voulgaris and Rachid
Guerraoui and Anne-Marie Kermarrec and Maarten van
Steen",
title = "Gossip-based peer sampling",
journal = j-TOCS,
volume = "25",
number = "3",
pages = "8:1--8:??",
month = aug,
year = "2007",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 16:06:57 MST 2007",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "8",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Xu:2007:MEE,
author = "Ruibin Xu and Daniel Moss{\'e} and Rami Melhem",
title = "Minimizing expected energy consumption in real-time
systems through dynamic voltage scaling",
journal = j-TOCS,
volume = "25",
number = "4",
pages = "9:1--9:??",
month = dec,
year = "2007",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1314299.1314300",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:15 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Many real-time systems, such as battery-operated
embedded devices, are energy constrained. A common
problem for these systems is how to reduce energy
consumption in the system as much as possible while
still meeting the deadlines; a commonly used power
management mechanism by these systems is dynamic
voltage scaling (DVS). Usually, the workloads executed
by these systems are variable and, more often than not,
unpredictable. Because of the unpredictability of the
workloads, one cannot guarantee to minimize the energy
consumption in the system. However, if the variability
of the workloads can be captured by the probability
distribution of the computational requirement of each
task in the system, it is possible to achieve the goal
of minimizing the expected energy consumption in the
system. In this paper, we investigate DVS schemes that
aim at minimizing expected energy consumption for
frame-based hard real-time systems. Our investigation
considers various DVS strategies (i.e., intra-task DVS,
inter-task DVS, and hybrid DVS) and both an ideal
system model (i.e., assuming unrestricted continuous
frequency, well-defined power-frequency relation, and
no speed change overhead) and a realistic system model
(i.e., the processor provides a set of discrete speeds,
no assumption is made on power-frequency relation, and
speed change overhead is considered). The highlights of
the investigation are two practical DVS schemes:
Practical PACE (PPACE) for a single task and Practical
Inter-Task DVS (PITDVS2) for general frame-based
systems. Evaluation results show that our proposed
schemes outperform and achieve significant energy
savings over existing schemes.",
acknowledgement = ack-nhfb,
articleno = "9",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "dynamic voltage scaling; power management; processor
acceleration to conserve energy; real-time",
}
@Article{Hur:2007:MSM,
author = "Ibrahim Hur and Calvin Lin",
title = "Memory scheduling for modern microprocessors",
journal = j-TOCS,
volume = "25",
number = "4",
pages = "10:1--10:??",
month = dec,
year = "2007",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1314299.1314301",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:15 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "The need to carefully schedule memory operations has
increased as memory performance has become increasingly
important to overall system performance. This article
describes the adaptive history-based (AHB) scheduler,
which uses the history of recently scheduled operations
to provide three conceptual benefits: (1) it allows the
scheduler to better reason about the delays associated
with its scheduling decisions, (2) it provides a
mechanism for combining multiple constraints, which is
important for increasingly complex DRAM structures, and
(3) it allows the scheduler to select operations so
that they match the program's mixture of Reads and
Writes, thereby avoiding certain bottlenecks within the
memory controller.\par
We have previously evaluated this scheduler in the
context of the IBM Power5. When compared with the state
of the art, this scheduler improves performance by
15.6\\%, 9.9\\%, and 7.6\\% for the Stream, NAS, and
commercial benchmarks, respectively. This article
expands our understanding of the AHB scheduler in a
variety of ways. Looking backwards, we describe the
scheduler in the context of prior work that focused
exclusively on avoiding bank conflicts, and we show
that the AHB scheduler is superior for the IBM Power5,
which we argue will be representative of future
microprocessor memory controllers. Looking forwards, we
evaluate this scheduler in the context of future
systems by varying a number of microarchitectural
features and hardware parameters. For example, we show
that the benefit of this scheduler increases as we move
to multithreaded environments.",
acknowledgement = ack-nhfb,
articleno = "10",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "adaptive history-based scheduling; memory scheduling;
memory system performance",
}
@Article{Vandebogart:2007:LEP,
author = "Steve Vandebogart and Petros Efstathopoulos and Eddie
Kohler and Maxwell Krohn and Cliff Frey and David
Ziegler and Frans Kaashoek and Robert Morris and David
Mazi{\`e}res",
title = "Labels and event processes in the {Asbestos} operating
system",
journal = j-TOCS,
volume = "25",
number = "4",
pages = "11:1--11:??",
month = dec,
year = "2007",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1314299.1314302",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:15 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Asbestos, a new operating system, provides novel
labeling and isolation mechanisms that help contain the
effects of exploitable software flaws. Applications can
express a wide range of policies with Asbestos's
kernel-enforced labels, including controls on
interprocess communication and system-wide information
flow. A new event process abstraction defines
lightweight, isolated contexts within a single process,
allowing one process to act on behalf of multiple users
while preventing it from leaking any single user's data
to others. A Web server demonstration application uses
these primitives to isolate private user data. Since
the untrusted workers that respond to client requests
are constrained by labels, exploited workers cannot
directly expose user data except as allowed by
application policy. The server application requires 1.4
memory pages per user for up to 145,000 users and
achieves connection rates similar to Apache,
demonstrating that additional security can come at an
acceptable cost.",
acknowledgement = ack-nhfb,
articleno = "11",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "information flow; labels; mandatory access control;
process abstractions; secure Web servers",
}
@Article{Coulson:2008:GCM,
author = "Geoff Coulson and Gordon Blair and Paul Grace and
Fran{\c{c}}ois Taiani and Ackbar Joolia and Kevin Lee
and Jo Ueyama and Thirunavukkarasu Sivaharan",
title = "A generic component model for building systems
software",
journal = j-TOCS,
volume = "26",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1328671.1328672",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:22 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Component-based software structuring principles are
now commonplace at the application level; but
componentization is far less established when it comes
to building low-level systems software. Although there
have been pioneering efforts in applying
componentization to systems-building, these efforts
have tended to target specific application domains
(e.g., embedded systems, operating systems,
communications systems, programmable networking
environments, or middleware platforms). They also tend
to be targeted at specific deployment environments
(e.g., standard personal computer (PC) environments,
network processors, or microcontrollers). The
disadvantage of this narrow targeting is that it fails
to maximize the genericity and abstraction potential of
the component approach. In this article, we argue for
the benefits and feasibility of a generic yet
tailorable approach to component-based systems-building
that offers a uniform programming model that is
applicable in a wide range of systems-oriented target
domains and deployment environments. The component
model, called OpenCom, is supported by a reflective
runtime architecture that is itself built from
components. After describing OpenCom and evaluating its
performance and overhead characteristics, we present
and evaluate two case studies of systems we have built
using OpenCom technology, thus illustrating its
benefits and its general applicability.",
acknowledgement = ack-nhfb,
articleno = "1",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "component-based software; computer systems
implementation",
}
@Article{Colohan:2008:IPD,
author = "Christopher B. Colohan and Anastassia Ailamaki and J.
Gregory Steffan and Todd C. Mowry",
title = "Incrementally parallelizing database transactions with
thread-level speculation",
journal = j-TOCS,
volume = "26",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1328671.1328673",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:22 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "With the advent of chip multiprocessors, exploiting
intratransaction parallelism in database systems is an
attractive way of improving transaction performance.
However, exploiting intratransaction parallelism is
difficult for two reasons: first, significant changes
are required to avoid races or conflicts within the
DBMS; and second, adding threads to transactions
requires a high level of sophistication from
transaction programmers. In this article we show how
dividing a transaction into speculative threads solves
both problems --- it minimizes the changes required to
the DBMS, and the details of parallelization are hidden
from the transaction programmer. Our technique requires
a limited number of small, localized changes to a
subset of the low-level data structures in the DBMS.
Through this method of incrementally parallelizing
transactions, we can dramatically improve performance:
on a simulated four-processor chip-multiprocessor, we
improve the response time by 44--66\\% for three of the
five TPC-C transactions, assuming the availability of
idle processors.",
acknowledgement = ack-nhfb,
articleno = "2",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "chip-multiprocessing; incremental parallelization;
optimistic concurrency; thread-level speculation",
}
@Article{Kostic:2008:HBD,
author = "Dejan Kosti{\'c} and Alex C. Snoeren and Amin Vahdat
and Ryan Braud and Charles Killian and James W.
Anderson and Jeannie Albrecht and Adolfo Rodriguez and
Erik Vandekieft",
title = "High-bandwidth data dissemination for large-scale
distributed systems",
journal = j-TOCS,
volume = "26",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1328671.1328674",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:22 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article focuses on the multireceiver data
dissemination problem. Initially, IP multicast formed
the basis for efficiently supporting such distribution.
More recently, overlay networks have emerged to support
point-to-multipoint communication. Both techniques
focus on constructing trees rooted at the source to
distribute content among all interested receivers. We
argue, however, that trees have two fundamental
limitations for data dissemination. First, since all
data comes from a single parent, participants must
often continuously probe in search of a parent with an
acceptable level of bandwidth. Second, due to packet
losses and failures, available bandwidth is
monotonically decreasing down the tree.\par
To address these limitations, we present Bullet, a data
dissemination mesh that takes advantage of the
computational and storage capabilities of end hosts to
create a distribution structure where a node receives
data in parallel from multiple peers. For the mesh to
deliver improved bandwidth and reliability, we need to
solve several key problems: (i) disseminating disjoint
data over the mesh, (ii) locating missing content,
(iii) finding who to peer with (peering strategy), (iv)
retrieving data at the right rate from all peers (flow
control), and (v) recovering from failures and adapting
to dynamically changing network conditions.
Additionally, the system should be self-adjusting and
should have few user-adjustable parameter settings. We
describe our approach to addressing all of these
problems in a working, deployed system across the
Internet. Bullet outperforms state-of-the-art systems,
including BitTorrent, by 25-70\\% and exhibits strong
performance and reliability in a range of deployment
settings. In addition, we find that, relative to
tree-based solutions, Bullet reduces the need to
perform expensive bandwidth probing.",
acknowledgement = ack-nhfb,
articleno = "3",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "bandwidth; overlays; peer-to-peer",
}
@Article{Chang:2008:BDS,
author = "Fay Chang and Jeffrey Dean and Sanjay Ghemawat and
Wilson C. Hsieh and Deborah A. Wallach and Mike Burrows
and Tushar Chandra and Andrew Fikes and Robert E.
Gruber",
title = "{Bigtable}: a distributed storage system for
structured data",
journal = j-TOCS,
volume = "26",
number = "2",
pages = "4:1--4:??",
month = jun,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1365815.1365816",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:30 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Bigtable is a distributed storage system for managing
structured data that is designed to scale to a very
large size: petabytes of data across thousands of
commodity servers. Many projects at Google store data
in Bigtable, including web indexing, Google Earth, and
Google Finance. These applications place very different
demands on Bigtable, both in terms of data size (from
URLs to web pages to satellite imagery) and latency
requirements (from backend bulk processing to real-time
data serving). Despite these varied demands, Bigtable
has successfully provided a flexible, high-performance
solution for all of these Google products. In this
article, we describe the simple data model provided by
Bigtable, which gives clients dynamic control over data
layout and format, and we describe the design and
implementation of Bigtable.",
acknowledgement = ack-nhfb,
articleno = "4",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "large-scale distributed storage",
}
@Article{Bar-Yossef:2008:RRW,
author = "Ziv Bar-Yossef and Roy Friedman and Gabriel Kliot",
title = "{RaWMS} --- {Random Walk Based Lightweight Membership
Service} for Wireless Ad Hoc Networks",
journal = j-TOCS,
volume = "26",
number = "2",
pages = "5:1--5:??",
month = jun,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1365815.1365817",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jun 16 17:52:30 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article presents RaWMS, a novel lightweight
random membership service for ad hoc networks. The
service provides each node with a partial uniformly
chosen view of network nodes. Such a membership service
is useful, for example, in data dissemination
algorithms, lookup and discovery services, peer
sampling services, and complete membership
construction. The design of RaWMS is based on a novel
reverse random walk (RW) sampling technique. The
article includes a formal analysis of both the reverse
RW sampling technique and RaWMS and verifies it through
a detailed simulation study. In addition, RaWMS is
compared both analytically and by simulations with a
number of other known methods such as flooding and
gossip-based techniques.",
acknowledgement = ack-nhfb,
articleno = "5",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "ad hoc networks; membership service; random walk",
}
@Article{Nightingale:2008:RS,
author = "Edmund B. Nightingale and Kaushik Veeraraghavan and
Peter M. Chen and Jason Flinn",
title = "Rethink the sync",
journal = j-TOCS,
volume = "26",
number = "3",
pages = "6:1--6:26",
month = sep,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1394441.1394442",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Sep 17 14:28:13 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We introduce {\em external synchrony}, a new model for
local file I/O that provides the reliability and
simplicity of synchronous I/O, yet also closely
approximates the performance of asynchronous I/O. An
external observer cannot distinguish the output of a
computer with an externally synchronous file system
from the output of a computer with a synchronous file
system. No application modification is required to use
an externally synchronous file system. In fact,
application developers can program to the simpler
synchronous I/O abstraction and still receive excellent
performance. We have implemented an externally
synchronous file system for Linux, called xsyncfs.
Xsyncfs provides the same durability and
ordering-guarantees as those provided by a {\em
synchronously\/} mounted ext3 file system. Yet even for
I/O-intensive benchmarks, xsyncfs performance is within
7\% of ext3 mounted {\em asynchronously}. Compared to
ext3 mounted synchronously, xsyncfs is up to two orders
of magnitude faster.",
acknowledgement = ack-nhfb,
articleno = "6",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "causality; file systems; speculative execution;
synchronous I/O",
}
@Article{Agrawal:2008:AWS,
author = "Kunal Agrawal and Charles E. Leiserson and Yuxiong He
and Wen Jing Hsu",
title = "Adaptive work-stealing with parallelism feedback",
journal = j-TOCS,
volume = "26",
number = "3",
pages = "7:1--7:32",
month = sep,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1394441.1394443",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Sep 17 14:28:13 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Multiprocessor scheduling in a shared multiprogramming
environment can be structured as two-level scheduling,
where a kernel-level job scheduler allots processors to
jobs and a user-level thread scheduler schedules the
work of a job on its allotted processors. We present a
randomized work-stealing thread scheduler for fork-join
multithreaded jobs that provides continual parallelism
feedback to the job scheduler in the form of requests
for processors. Our A-STEAL algorithm is appropriate
for large parallel servers where many jobs share a
common multiprocessor resource and in which the number
of processors available to a particular job may vary
during the job's execution. Assuming that the job
scheduler never allots a job more processors than
requested by the job's thread scheduler, A-STEAL
guarantees that the job completes in near-optimal time
while utilizing at least a constant fraction of the
allotted processors.\par
We model the job scheduler as the thread scheduler's
adversary, challenging the thread scheduler to be
robust to the operating environment as well as to the
job scheduler's administrative policies. For example,
the job scheduler might make a large number of
processors available exactly when the job has little
use for them. To analyze the performance of our
adaptive thread scheduler under this stringent
adversarial assumption, we introduce a new technique
called {\em trim analysis,\/} which allows us to prove
that our thread scheduler performs poorly on no more
than a small number of time steps, exhibiting
near-optimal behavior on the vast majority.\par
More precisely, suppose that a job has work $T_1$ and
span $T_\infty$. On a machine with $P$ processors,
A-STEAL completes the job in an expected duration of
$O(T_1 / \tilde{P} + T_\infty + L \lg P)$ time steps,
where $L$ is the length of a scheduling quantum, and
$\tilde{P}$ denotes the $O(T_\infty + L \lg P)$-trimmed
availability. This quantity is the average of the
processor availability over all time steps except the
$O(T_\infty + L \lg P)$ time steps that have the
highest processor availability. When the job's
parallelism dominates the trimmed availability, that
is, $\tilde{P} \ll T_1 / T_\infty$, the job achieves
nearly perfect linear speedup. Conversely, when the
trimmed mean dominates the parallelism, the asymptotic
running time of the job is nearly the length of its
span, which is optimal.\par
We measured the performance of A-STEAL on a simulated
multiprocessor system using synthetic workloads. For
jobs with sufficient parallelism, our experiments
confirm that A-STEAL provides almost perfect linear
speedup across a variety of processor availability
profiles. We compared A-STEAL with the ABP algorithm,
an adaptive work-stealing thread scheduler developed by
Arora et al. [1998] which does not employ parallelism
feedback. On moderately to heavily loaded machines with
large numbers of processors, A-STEAL typically
completed jobs more than twice as quickly as ABP,
despite being allotted the same number or fewer
processors on every step, while wasting only 10\% of
the processor cycles wasted by ABP.",
acknowledgement = ack-nhfb,
articleno = "7",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "adaptive scheduling; adversary; instantaneous
parallelism; job scheduling; multiprocessing;
multiprogramming; parallel computation; parallelism
feedback; processor allocation; randomized algorithm;
space sharing; span; thread scheduling; trim analysis;
two-level scheduling; work; work-stealing",
}
@Article{Shieh:2008:SAC,
author = "Alan Shieh and Andrew C. Myers and Emin G{\"u}n
Sirer",
title = "A stateless approach to connection-oriented
protocols",
journal = j-TOCS,
volume = "26",
number = "3",
pages = "8:1--8:50",
month = sep,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1394441.1394444",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Sep 17 14:28:13 MDT 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Traditional operating system interfaces and network
protocol implementations force some system state to be
kept on both sides of a connection. This state ties the
connection to its endpoints, impedes transparent
failover, permits denial-of-service attacks, and limits
scalability. This article introduces a novel TCP-like
transport protocol and a new interface to replace
sockets that together enable all state to be kept on
one endpoint, allowing the other endpoint, typically
the server, to operate without any per-connection
state. Called {\em Trickles}, this approach enables
servers to scale well with increasing numbers of
clients, consume fewer resources, and better resist
denial-of-service attacks. Measurements on a full
implementation in Linux indicate that Trickles achieves
performance comparable to TCP/IP, interacts well with
other flows, and scales well. Trickles also enables
qualitatively different kinds of networked services.
Services can be geographically replicated and contacted
through an anycast primitive for improved availability
and performance. Widely-deployed practices that
currently have client-observable side effects, such as
periodic server reboots, connection redirection, and
failover, can be made transparent, and perform well,
under Trickles. The protocol is secure against
tampering and replay attacks, and the client interface
is backward-compatible, requiring no changes to
sockets-based client applications.",
acknowledgement = ack-nhfb,
articleno = "8",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "stateless interfaces; stateless protocols",
}
@Article{Costa:2008:VEE,
author = "Manuel Costa and Jon Crowcroft and Miguel Castro and
Antony Rowstron and Lidong Zhou and Lintao Zhang and
Paul Barham",
title = "{Vigilante}: End-to-end containment of {Internet} worm
epidemics",
journal = j-TOCS,
volume = "26",
number = "4",
pages = "9:1--9:??",
month = dec,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1455258.1455259",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Dec 23 13:36:21 MST 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Worm containment must be automatic because worms can
spread too fast for humans to respond. Recent work
proposed network-level techniques to automate worm
containment; these techniques have limitations because
there is no information about the vulnerabilities
exploited by worms at the network level. We propose
Vigilante, a new end-to-end architecture to contain
worms automatically that addresses these
limitations.\par
In Vigilante, hosts detect worms by instrumenting
vulnerable programs to analyze infection attempts. We
introduce {\em dynamic data-flow analysis\/}: a
broad-coverage host-based algorithm that can detect
unknown worms by tracking the flow of data from network
messages and disallowing unsafe uses of this data. We
also show how to integrate other host-based detection
mechanisms into the Vigilante architecture. Upon
detection, hosts generate {\em self-certifying
alerts\/} (SCAs), a new type of security alert that can
be inexpensively verified by any vulnerable host. Using
SCAs, hosts can cooperate to contain an outbreak,
without having to trust each other. Vigilante
broadcasts SCAs over an overlay network that propagates
alerts rapidly and resiliently. Hosts receiving an SCA
protect themselves by generating filters with {\em
vulnerability condition slicing\/}: an algorithm that
performs dynamic analysis of the vulnerable program to
identify control-flow conditions that lead to
successful attacks. These filters block the worm attack
and all its polymorphic mutations that follow the
execution path identified by the SCA.\par
Our results show that Vigilante can contain
fast-spreading worms that exploit unknown
vulnerabilities, and that Vigilante's filters introduce
a negligible performance overhead. Vigilante does not
require any changes to hardware, compilers, operating
systems, or the source code of vulnerable programs;
therefore, it can be used to protect current software
binaries.",
acknowledgement = ack-nhfb,
articleno = "9",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "dynamic data-flow analysis; program analysis;
self-certifying alerts; vulnerability condition
slicing; Worm containment",
}
@Article{Qiao:2008:IPP,
author = "Yi Qiao and Fabi{\'a}n E. Bustamante and Peter A.
Dinda and Stefan Birrer and Dong Lu",
title = "Improving peer-to-peer performance through server-side
scheduling",
journal = j-TOCS,
volume = "26",
number = "4",
pages = "10:1--10:??",
month = dec,
year = "2008",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1455258.1455260",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Dec 23 13:36:21 MST 2008",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We show how to significantly improve the mean response
time seen by both uploaders and downloaders in
peer-to-peer data-sharing systems. Our work is
motivated by the observation that response times are
largely determined by the performance of the peers
serving the requested objects, that is, by the peers in
their capacity as servers. With this in mind, we take a
close look at this {\em server side\/} of peers,
characterizing its workload by collecting and examining
an extensive set of traces. Using trace-driven
simulation, we demonstrate the promise and potential
problems with scheduling policies based on
shortest-remaining-processing-time (SRPT), the
algorithm known to be optimal for minimizing mean
response time. The key challenge to using SRPT in this
context is determining request service times. In
addressing this challenge, we introduce two new
estimators that enable {\em predictive\/} SRPT
scheduling policies that closely approach the
performance of ideal SRPT. We evaluate our approach
through extensive single-server and system-level
simulation coupled with real Internet deployment and
experimentation.",
acknowledgement = ack-nhfb,
articleno = "10",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Peer-to-peer; scheduling; server-side; size-based
scheduling; SRPT",
}
@Article{Choi:2009:HCS,
author = "Seungryul Choi and Donald Yeung",
title = "Hill-climbing {SMT} processor resource distribution",
journal = j-TOCS,
volume = "27",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2009",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Feb 13 18:30:25 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "The key to high performance in Simultaneous
MultiThreaded (SMT) processors lies in optimizing the
distribution of shared resources to active threads.
Existing resource distribution techniques optimize
performance only indirectly. They infer potential
performance bottlenecks by observing indicators, like
instruction occupancy or cache miss counts, and take
actions to try to alleviate them. While the corrective
actions are designed to improve performance, their
actual performance impact is not known since end
performance is never monitored. Consequently, potential
performance gains are lost whenever the corrective
actions do not effectively address the actual
bottlenecks occurring in the pipeline.\par
We propose a different approach to SMT resource
distribution that optimizes end performance directly.
Our approach observes the impact that resource
distribution decisions have on performance at runtime,
and feeds this information back to the resource
distribution mechanisms to improve future decisions. By
evaluating many different resource distributions, our
approach tries to learn the best distribution over
time. Because we perform learning online, learning time
is crucial. We develop a hill-climbing algorithm that
quickly learns the best distribution of resources by
following the performance gradient within the resource
distribution space. We also develop several ideal
learning algorithms to enable deeper insights through
limit studies.\par
This article conducts an in-depth investigation of
hill-climbing SMT resource distribution using a
comprehensive suite of 63 multiprogrammed workloads.
Our results show hill-climbing outperforms ICOUNT,
FLUSH, and DCRA (three existing SMT techniques) by
11.4\%, 11.5\%, and 2.8\%, respectively, under the
weighted IPC metric. A limit study conducted using our
ideal learning algorithms shows our approach can
potentially outperform the same techniques by 19.2\%,
18.0\%, and 7.6\%, respectively, thus demonstrating
additional room exists for further improvement. Using
our ideal algorithms, we also identify three
bottlenecks that limit online learning speed: local
maxima, phased behavior, and interepoch jitter. We
define metrics to quantify these learning bottlenecks,
and characterize the extent to which they occur in our
workloads. Finally, we conduct a sensitivity study, and
investigate several extensions to improve our
hill-climbing technique.",
acknowledgement = ack-nhfb,
articleno = "1",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ntarmos:2009:DHS,
author = "N. Ntarmos and P. Triantafillou and G. Weikum",
title = "Distributed hash sketches: {Scalable}, efficient, and
accurate cardinality estimation for distributed
multisets",
journal = j-TOCS,
volume = "27",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2009",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Feb 13 18:30:25 MST 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Counting items in a distributed system, and estimating
the cardinality of multisets in particular, is
important for a large variety of applications and a
fundamental building block for emerging Internet-scale
information systems. Examples of such applications
range from optimizing query access plans in
peer-to-peer data sharing, to computing the
significance (rank/score) of data items in distributed
information retrieval. The general formal problem
addressed in this article is computing the network-wide
distinct number of items with some property (e.g.,
distinct files with file name containing ``spiderman'')
where each node in the network holds an arbitrary
subset, possibly overlapping the subsets of other
nodes. The key requirements that a viable approach must
satisfy are: (1) scalability towards very large network
size, (2) efficiency regarding messaging overhead, (3)
load balance of storage and access, (4) accuracy of the
cardinality estimation, and (5) simplicity and easy
integration in applications. This article contributes
the DHS (Distributed Hash Sketches) method for this
problem setting: a distributed, scalable, efficient,
and accurate multiset cardinality estimator. DHS is
based on hash sketches for probabilistic counting, but
distributes the bits of each counter across network
nodes in a judicious manner based on principles of
Distributed Hash Tables, paying careful attention to
fast access and aggregation as well as update costs.
The article discusses various design choices,
exhibiting tunable trade-offs between estimation
accuracy, hop-count efficiency, and load distribution
fairness. We further contribute a full-fledged,
publicly available, open-source implementation of all
our methods, and a comprehensive experimental
evaluation for various settings.",
acknowledgement = ack-nhfb,
articleno = "2",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Eyerman:2009:MPM,
author = "Stijn Eyerman and Lieven Eeckhout and Tejas Karkhanis
and James E. Smith",
title = "A mechanistic performance model for superscalar
out-of-order processors",
journal = j-TOCS,
volume = "27",
number = "2",
pages = "3:1--3:??",
month = may,
year = "2009",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1534909.1534910",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed May 27 15:56:17 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "A mechanistic model for out-of-order superscalar
processors is developed and then applied to the study
of microarchitecture resource scaling. The model
divides execution time into intervals separated by
disruptive miss events such as branch mispredictions
and cache misses. Each type of miss event results in
characterizable performance behavior for the execution
time interval. By considering an interval's type and
length (measured in instructions), execution time can
be predicted for the interval. Overall execution time
is then determined by aggregating the execution time
over all intervals. The mechanistic model provides
several advantages over prior modeling approaches, and,
when estimating performance, it differs from detailed
simulation of a 4-wide out-of-order processor by an
average of 7\%.\par
The mechanistic model is applied to the general problem
of resource scaling in out-of-order superscalar
processors. First, we use the model to determine size
relationships among microarchitecture structures in a
balanced processor design. Second, we use the
mechanistic model to study scaling of both pipeline
depth and width in balanced processor designs. We
corroborate previous results in this area and provide
new results. For example, we show that at optimal
design points, the pipeline depth times the square root
of the processor width is nearly constant. Finally, we
consider the behavior of unbalanced, overprovisioned
processor designs based on insight gained from the
mechanistic model. We show that in certain situations
an overprovisioned processor may lead to improved
overall performance. Designs where a processor's
dispatch width is wider than its issue width are of
particular interest.",
acknowledgement = ack-nhfb,
articleno = "3",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "analytical modeling; balanced processor design;
mechanistic modeling; overprovisioned processor design;
performance modeling; pipeline depth; pipeline width;
resource scaling; Superscalar out-of-order processor;
wide front-end dispatch processors",
}
@Article{Zagorodnov:2009:PLO,
author = "Dmitrii Zagorodnov and Keith Marzullo and Lorenzo
Alvisi and Thomas C. Bressoud",
title = "Practical and low-overhead masking of failures of
{TCP}-based servers",
journal = j-TOCS,
volume = "27",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2009",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1534909.1534911",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed May 27 15:56:17 MDT 2009",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article describes an architecture that allows a
replicated service to survive crashes without breaking
its TCP connections. Our approach does not require
modifications to the TCP protocol, to the operating
system on the server, or to any of the software running
on the clients. Furthermore, it runs on commodity
hardware. We compare two implementations of this
architecture (one based on primary/backup replication
and another based on message logging) focusing on
scalability, failover time, and application
transparency. We evaluate three types of services: a
file server, a Web server, and a multimedia streaming
server. Our experiments suggest that the approach
incurs low overhead on throughput, scales well as the
number of clients increases, and allows recovery of the
service in near-optimal time.",
acknowledgement = ack-nhfb,
articleno = "4",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Fault-tolerant computing system; primary/backup
approach; TCP/IP",
}
@Article{Aguilera:2009:SNP,
author = "Marcos K. Aguilera and Arif Merchant and Mehul Shah
and Alistair Veitch and Christos Karamanolis",
title = "{Sinfonia}: a new paradigm for building scalable
distributed systems",
journal = j-TOCS,
volume = "27",
number = "3",
pages = "5:1--5:48",
month = nov,
year = "2009",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1629087.1629088",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Mar 15 09:06:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We propose a new paradigm for building scalable
distributed systems. Our approach does not require
dealing with message-passing protocols, a major
complication in existing distributed systems. Instead,
developers just design and manipulate data structures
within our service called Sinfonia. Sinfonia keeps data
for applications on a set of memory nodes, each
exporting a linear address space. At the core of
Sinfonia is a new minitransaction primitive that
enables efficient and consistent access to data, while
hiding the complexities that arise from concurrency and
failures. Using Sinfonia, we implemented two very
different and complex applications in a few months: a
cluster file system and a group communication service.
Our implementations perform well and scale to hundreds
of machines.",
acknowledgement = ack-nhfb,
articleno = "5",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Distributed systems; fault tolerance; scalability;
shared memory; transactions; two-phase commit",
}
@Article{Cherkasova:2009:AAD,
author = "Ludmila Cherkasova and Kivanc Ozonat and Ningfang Mi
and Julie Symons and Evgenia Smirni",
title = "Automated anomaly detection and performance modeling
of enterprise applications",
journal = j-TOCS,
volume = "27",
number = "3",
pages = "6:1--6:32",
month = nov,
year = "2009",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1629087.1629089",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Mar 15 09:06:12 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Automated tools for understanding application behavior
and its changes during the application lifecycle are
essential for many performance analysis and debugging
tasks. Application performance issues have an immediate
impact on customer experience and satisfaction. A
sudden slowdown of enterprise-wide application can
effect a large population of customers, lead to delayed
projects, and ultimately can result in company
financial loss. Significantly shortened time between
new software releases further exacerbates the problem
of thoroughly evaluating the performance of an updated
application. Our thesis is that online performance
modeling should be a part of routine application
monitoring. Early, informative warnings on significant
changes in application performance should help service
providers to timely identify and prevent performance
problems and their negative impact on the service. We
propose a novel framework for automated anomaly
detection and application change analysis. It is based
on integration of two complementary techniques: (i) a
regression-based transaction model that reflects a
resource consumption model of the application, and (ii)
an application performance signature that provides a
compact model of runtime behavior of the application.
The proposed integrated framework provides a simple and
powerful solution for anomaly detection and analysis of
essential performance changes in application behavior.
An additional benefit of the proposed approach is its
simplicity: It is not intrusive and is based on
monitoring data that is typically available in
enterprise production environments. The introduced
solution further enables the automation of capacity
planning and resource provisioning tasks of multitier
applications in rapidly evolving IT environments.",
acknowledgement = ack-nhfb,
articleno = "6",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Anomaly detection; capacity planning; multitier
applications; online algorithms; performance modeling",
}
@Article{Kotla:2009:ZSB,
author = "Ramakrishna Kotla and Lorenzo Alvisi and Mike Dahlin
and Allen Clement and Edmund Wong",
title = "{Zyzzyva}: {Speculative Byzantine} fault tolerance",
journal = j-TOCS,
volume = "27",
number = "4",
pages = "7:1--7:39",
month = dec,
year = "2009",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1658357.1658358",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Mar 15 09:06:46 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "A longstanding vision in distributed systems is to
build reliable systems from unreliable components. An
enticing formulation of this vision is Byzantine
Fault-Tolerant (BFT) state machine replication, in
which a group of servers collectively act as a correct
server even if some of the servers misbehave or
malfunction in arbitrary (``Byzantine'') ways. Despite
this promise, practitioners hesitate to deploy BFT
systems, at least partly because of the perception that
BFT must impose high overheads.\par
In this article, we present Zyzzyva, a protocol that
uses speculation to reduce the cost of BFT replication.
In Zyzzyva, replicas reply to a client's request
without first running an expensive three-phase commit
protocol to agree on the order to process requests.
Instead, they optimistically adopt the order proposed
by a primary server, process the request, and reply
immediately to the client. If the primary is faulty,
replicas can become temporarily inconsistent with one
another, but clients detect inconsistencies, help
correct replicas converge on a single total ordering of
requests, and only rely on responses that are
consistent with this total order. This approach allows
Zyzzyva to reduce replication overheads to near their
theoretical minima and to achieve throughputs of tens
of thousands of requests per second, making BFT
replication practical for a broad range of demanding
services.",
acknowledgement = ack-nhfb,
articleno = "7",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Byzantine fault tolerance; output commit; replication;
speculative execution",
}
@Article{Vera:2009:SRL,
author = "Xavier Vera and Jaume Abella and Javier Carretero and
Antonio Gonz{\'a}lez",
title = "Selective replication: a lightweight technique for
soft errors",
journal = j-TOCS,
volume = "27",
number = "4",
pages = "8:1--8:30",
month = dec,
year = "2009",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1658357.1658359",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Mar 15 09:06:46 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Soft errors are an important challenge in contemporary
microprocessors. Modern processors have caches and
large memory arrays protected by parity or error
detection and correction codes. However, today's
failure rate is dominated by flip flops, latches, and
the increasing sensitivity of combinational logic to
particle strikes. Moreover, as Chip Multi-Processors
(CMPs) become ubiquitous, meeting the FIT budget for
new designs is becoming a major
challenge.\par
Solutions based on replicating threads have been
explored deeply; however, their high cost in
performance and energy make them unsuitable for current
designs. Moreover, our studies based on a typical
configuration for a modern processor show that focusing
on the top 5 most vulnerable structures can provide up
to 70\% reduction in FIT rate. Therefore, full
replication may overprotect the chip by reducing the
FIT much below budget.\par
We propose {\em Selective Replication}, a
lightweight-reconfigurable mechanism that achieves a
high FIT reduction by protecting the most vulnerable
instructions with minimal performance and energy
impact. Low performance degradation is achieved by not
requiring additional issue slots and reissuing
instructions only during the time window between when
they are retirable and they actually retire. Coverage
can be reconfigured online by replicating only a subset
of the instructions (the most vulnerable ones).
Instructions' vulnerability is estimated based on the
area they occupy and the time they spend in the issue
queue. By changing the vulnerability threshold, we can
adjust the trade-off between coverage and performance
loss.\par
Results for an out-of-order processor configured
similarly to Intel{\reg} Core\TM{} Micro-Architecture
show that our scheme can achieve over 65\% FIT
reduction with less than 4\% performance degradation
with small area and complexity overhead.",
acknowledgement = ack-nhfb,
articleno = "8",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "AVF prediction; FIT reduction; redundant
multithreading; Soft errors",
}
@Article{Chen:2010:E,
author = "Peter M. Chen",
title = "Editorial",
journal = j-TOCS,
volume = "28",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1731060.1731061",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Apr 5 12:44:43 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "1",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yabandeh:2010:PPI,
author = "Maysam Yabandeh and Nikola Kne{\v{z}}evi{\'c} and
Dejan Kosti{\'c} and Viktor Kuncak",
title = "Predicting and preventing inconsistencies in deployed
distributed systems",
journal = j-TOCS,
volume = "28",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1731060.1731062",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Apr 5 12:44:43 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We propose a new approach for developing and deploying
distributed systems, in which nodes predict distributed
consequences of their actions and use this information
to detect and avoid errors. Each node continuously runs
a state exploration algorithm on a recent consistent
snapshot of its neighborhood and predicts possible
future violations of specified safety properties. We
describe a new state exploration algorithm, consequence
prediction, which explores causally related chains of
events that lead to property violation.\par
This article describes the design and implementation of
this approach, termed CrystalBall. We evaluate
CrystalBall on RandTree, BulletPrime, Paxos, and Chord
distributed system implementations. We identified new
bugs in mature Mace implementations of three systems.
Furthermore, we show that if the bug is not corrected
during system development, CrystalBall is effective in
steering the execution away from inconsistent states at
runtime.",
acknowledgement = ack-nhfb,
articleno = "2",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "consequence prediction; Distributed systems; enforcing
safety properties; execution steering; reliability",
}
@Article{Walfish:2010:DDO,
author = "Michael Walfish and Mythili Vutukuru and Hari
Balakrishnan and David Karger and Scott Shenker",
title = "{DDoS} defense by offense",
journal = j-TOCS,
volume = "28",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1731060.1731063",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Apr 5 12:44:43 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article presents the design, implementation,
analysis, and experimental evaluation of {\em
speak-up}, a defense against {\em application-level\/}
distributed denial-of-service (DDoS), in which
attackers cripple a server by sending
legitimate-looking requests that consume computational
resources (e.g., CPU cycles, disk). With speak-up, a
victimized server encourages all clients, resources
permitting, {\em to automatically send higher volumes
of traffic}. We suppose that attackers are already
using most of their upload bandwidth so cannot react to
the encouragement. Good clients, however, have spare
upload bandwidth so can react to the encouragement with
drastically higher volumes of traffic. The intended
outcome of this traffic inflation is that the good
clients crowd out the bad ones, thereby capturing a
much larger fraction of the server's resources than
before. We experiment under various conditions and find
that speak-up causes the server to spend resources on a
group of clients in rough proportion to their aggregate
upload bandwidths, which is the intended result.",
acknowledgement = ack-nhfb,
articleno = "3",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "bandwidth; currency; DoS attack",
}
@Article{Roeder:2010:PO,
author = "Tom Roeder and Fred B. Schneider",
title = "Proactive obfuscation",
journal = j-TOCS,
volume = "28",
number = "2",
pages = "4:1--4:??",
month = jul,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1813654.1813655",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jul 22 12:42:28 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "{\em Proactive obfuscation\/} is a new method for
creating server replicas that are likely to have fewer
shared vulnerabilities. It uses semantics-preserving
code transformations to generate diverse executables,
periodically restarting servers with these fresh
versions. The periodic restarts help bound the number
of compromised replicas that a service ever
concurrently runs, and therefore proactive obfuscation
makes an adversary's job harder. Proactive obfuscation
was used in implementing two prototypes: a distributed
firewall based on state-machine replication and a
distributed storage service based on quorum systems.
Costs intrinsic to supporting proactive obfuscation in
replicated systems were evaluated by measuring the
performance of these prototypes. The results show that
employing proactive obfuscation adds little to the cost
of replica-management protocols.",
acknowledgement = ack-nhfb,
articleno = "4",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Byzantine fault tolerance; distributed systems;
proactive recovery; quorum systems; state machine
approach",
}
@Article{Guerraoui:2010:TOT,
author = "Rachid Guerraoui and Ron R. Levy and Bastian Pochon
and Vivien Qu{\'e}ma",
title = "Throughput optimal total order broadcast for cluster
environments",
journal = j-TOCS,
volume = "28",
number = "2",
pages = "5:1--5:??",
month = jul,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1813654.1813656",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jul 22 12:42:28 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Total order broadcast is a fundamental communication
primitive that plays a central role in bringing cheap
software-based high availability to a wide range of
services. This article studies the practical
performance of such a primitive on a cluster of
homogeneous machines.\par
We present LCR, the first throughput optimal uniform
total order broadcast protocol. LCR is based on a ring
topology. It only relies on point-to-point
inter-process communication and has a linear latency
with respect to the number of processes. LCR is also
fair in the sense that each process has an equal
opportunity of having its messages delivered by all
processes.\par
We benchmark a C implementation of LCR against Spread
and JGroups, two of the most widely used group
communication packages. LCR provides higher throughput
than the alternatives, over a large number of
scenarios.",
acknowledgement = ack-nhfb,
articleno = "5",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "cluster computing; replication; software
fault-tolerance; total order broadcast",
}
@Article{Amir:2010:SWM,
author = "Yair Amir and Claudiu Danilov and Raluca
Musu{\~a}loiu-Elefteri and Nilo Rivera",
title = "The {SMesh} wireless mesh network",
journal = j-TOCS,
volume = "28",
number = "3",
pages = "6:1--6:??",
month = sep,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1841313.1841314",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Sep 30 09:01:34 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Wireless mesh networks extend the connectivity range
of mobile devices by using multiple access points, some
of them connected to the Internet, to create a mesh
topology and forward packets over multiple wireless
hops. However, the quality of service provided by the
mesh is impaired by the delays and disconnections
caused by handoffs, as clients move within the area
covered by multiple access points. We present the
architecture and protocols of SMesh, the first
transparent wireless mesh system that offers seamless,
fast handoff, supporting real-time applications such as
interactive VoIP. The handoff and routing logic is done
solely by the access points, and therefore connectivity
is attainable by any 802.11 device. In SMesh, the
entire mesh network is seen by the mobile clients as a
single, omnipresent access point, giving the mobile
clients the illusion that they are stationary. We use
multicast for access points coordination and, during
handoff transitions, we use more than one access point
to handle the moving client. SMesh provides a hybrid
routing protocol that optimizes routes over wireless
and wired links in a multihomed environment.
Experimental results on a fully deployed mesh network
demonstrate the effectiveness of the SMesh architecture
and its intra-domain and inter-domain handoff
protocols.",
acknowledgement = ack-nhfb,
articleno = "6",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "fast handoff; inter-domain; intra-domain;
micromobility; Wireless mesh networks",
}
@Article{Friedman:2010:PQS,
author = "Roy Friedman and Gabriel Kliot and Chen Avin",
title = "Probabilistic quorum systems in wireless {Ad Hoc}
networks",
journal = j-TOCS,
volume = "28",
number = "3",
pages = "7:1--7:??",
month = sep,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1841313.1841315",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Sep 30 09:01:34 MDT 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Quorums are a basic construct in solving many
fundamental distributed computing problems. One of the
known ways of making quorums scalable and efficient is
by weakening their intersection guarantee to being
probabilistic. This article explores several access
strategies for implementing probabilistic quorums in ad
hoc networks. In particular, we present the first
detailed study of asymmetric probabilistic biquorum
systems, that allow to mix different access strategies
and different quorums sizes, while guaranteeing the
desired intersection probability. We show the
advantages of asymmetric probabilistic biquorum systems
in ad hoc networks. Such an asymmetric construction is
also useful for other types of networks with nonuniform
access costs (e.g., peer-to-peer networks). The article
includes a formal analysis of these approaches backed
up by an extensive simulation-based study. The study
explores the impact of various parameters such as
network size, network density, mobility speed, and
churn. In particular, we show that one of the
strategies that uses random walks exhibits the smallest
communication overhead, thus being very attractive for
ad hoc networks.",
acknowledgement = ack-nhfb,
articleno = "7",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
keywords = "Distributed middleware; location service; quorums
systems; random walks; wireless ad hoc networks",
}
@Article{Blagodurov:2010:CAS,
author = "Sergey Blagodurov and Sergey Zhuravlev and Alexandra
Fedorova",
title = "Contention-Aware Scheduling on Multicore Systems",
journal = j-TOCS,
volume = "28",
number = "4",
pages = "8:1--8:??",
month = dec,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1880018.1880019",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 23 17:06:32 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Contention for shared resources on multicore
processors remains an unsolved problem in existing
systems despite significant research efforts dedicated
to this problem in the past. Previous solutions focused
primarily on hardware techniques and software page
coloring to mitigate this problem. Our goal is to
investigate how and to what extent contention for
shared resource can be mitigated via thread scheduling.
Scheduling is an attractive tool, because it does not
require extra hardware and is relatively easy to
integrate into the system. Our study is the first to
provide a comprehensive analysis of
contention-mitigating techniques that use only
scheduling. The most difficult part of the problem is
to find a classification scheme for threads, which
would determine how they affect each other when
competing for shared resources.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Cheung:2010:LBC,
author = "Alex King Yeung Cheung and Hans-Arno Jacobsen",
title = "Load Balancing Content-Based Publish\slash Subscribe
Systems",
journal = j-TOCS,
volume = "28",
number = "4",
pages = "9:1--9:??",
month = dec,
year = "2010",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1880018.1880020",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 23 17:06:32 MST 2010",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Distributed content-based publish/subscribe systems
suffer from performance degradation and poor
scalability caused by uneven load distributions typical
in real-world applications. The reason for this
shortcoming is the lack of a load balancing scheme.
This article proposes a load balancing solution
specifically tailored to the needs of content-based
publish/subscribe systems that is distributed, dynamic,
adaptive, transparent, and accommodates heterogeneity.
The solution consists of three key contributions: a
load balancing framework, a novel load estimation
algorithm, and three offload strategies. A working
prototype of our solution is built on an open-sourced
content-based publish/subscribe system and evaluated on
PlanetLab, a cluster testbed, and in simulations.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kim:2011:SSE,
author = "Changhoon Kim and Matthew Caesar and Jennifer
Rexford",
title = "{SEATTLE}: a {Scalable Ethernet Architecture for Large
Enterprises}",
journal = j-TOCS,
volume = "29",
number = "1",
pages = "1:1--1:35",
month = feb,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1925109.1925110",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Feb 28 16:17:43 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "IP networks today require massive effort to configure
and manage. Ethernet is vastly simpler to manage, but
does not scale beyond small local area networks. This
article describes an alternative network architecture
called SEATTLE that achieves the best of both worlds:
The scalability of IP combined with the simplicity of
Ethernet. SEATTLE provides plug-and-play functionality
via flat addressing, while ensuring scalability and
efficiency through shortest-path routing and hash-based
resolution of host information. In contrast to previous
work on identity-based routing, SEATTLE ensures path
predictability, controllability, and stability, thus
simplifying key network-management operations, such as
capacity planning, traffic engineering, and
troubleshooting. We performed a simulation study driven
by real-world traffic traces and network topologies,
and used Emulab to evaluate a prototype of our design
based on the Click and XORP open-source routing
platforms.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lagar-Cavilla:2011:SVM,
author = "H. Andr{\'e}s Lagar-Cavilla and Joseph A. Whitney and
Roy Bryant and Philip Patchin and Michael Brudno and
Eyal de Lara and Stephen M. Rumble and M.
Satyanarayanan and Adin Scannell",
title = "{SnowFlock}: Virtual Machine Cloning as a First-Class
Cloud Primitive",
journal = j-TOCS,
volume = "29",
number = "1",
pages = "2:1--2:45",
month = feb,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1925109.1925111",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Feb 28 16:17:43 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "A basic building block of cloud computing is
virtualization. Virtual machines (VMs) encapsulate a
user's computing environment and efficiently isolate it
from that of other users. VMs, however, are large
entities, and no clear APIs exist yet to provide users
with programatic, fine-grained control on short time
scales. We present SnowFlock, a paradigm and system for
cloud computing that introduces VM cloning as a
first-class cloud abstraction. VM cloning exploits the
well-understood and effective semantics of UNIX fork.
We demonstrate multiple usage models of VM cloning:
users can incorporate the primitive in their code, can
wrap around existing toolchains via scripting, can
encapsulate the API within a parallel programming
framework, or can use it to load-balance and self-scale
clustered servers.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Meisner:2011:PSA,
author = "David Meisner and Brian T. Gold and Thomas F.
Wenisch",
title = "The {PowerNap} Server Architecture",
journal = j-TOCS,
volume = "29",
number = "1",
pages = "3:1--3:24",
month = feb,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1925109.1925112",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Feb 28 16:17:43 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Data center power consumption is growing to
unprecedented levels: the EPA estimates U.S. data
centers will consume 100 billion kilowatt hours
annually by 2011. Much of this energy is wasted in idle
systems: in typical deployments, server utilization is
below 30\%, but idle servers still consume 60\% of
their peak power draw. Typical idle periods---though
frequent---last seconds or less, confounding simple
energy-conservation approaches. In this article, we
propose PowerNap, an energy-conservation approach where
the entire system transitions rapidly between a
high-performance active state and a near-zero-power
idle state in response to instantaneous load. Rather
than requiring fine-grained power-performance states
and complex load-proportional operation from individual
system components, PowerNap instead calls for
minimizing idle power and transition time, which are
simpler optimization goals.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Gupta:2011:DTD,
author = "Diwaker Gupta and Kashi Venkatesh Vishwanath and
Marvin McNett and Amin Vahdat and Ken Yocum and Alex
Snoeren and Geoffrey M. Voelker",
title = "{DieCast}: Testing Distributed Systems with an
Accurate Scale Model",
journal = j-TOCS,
volume = "29",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1963559.1963560",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon May 9 16:05:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Large-scale network services can consist of tens of
thousands of machines running thousands of unique
software configurations spread across hundreds of
physical networks. Testing such services for complex
performance problems and configuration errors remains a
difficult problem. Existing testing techniques, such as
simulation or running smaller instances of a service,
have limitations in predicting overall service behavior
at such scales. Testing large services should ideally
be done at the same scale and configuration as the
target deployment, which can be technically and
economically infeasible. We present DieCast, an
approach to scaling network services in which we
multiplex all of the nodes in a given service
configuration as virtual machines across a much smaller
number of physical machines in a test harness.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yadgar:2011:MMM,
author = "Gala Yadgar and Michael Factor and Kai Li and Assaf
Schuster",
title = "Management of Multilevel, Multiclient Cache
Hierarchies with Application Hints",
journal = j-TOCS,
volume = "29",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1963559.1963561",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon May 9 16:05:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Multilevel caching, common in many storage
configurations, introduces new challenges to
traditional cache management: data must be kept in the
appropriate cache and replication avoided across the
various cache levels. Additional challenges are
introduced when the lower levels of the hierarchy are
shared by multiple clients. Sharing can have both
positive and negative effects. While data fetched by
one client can be used by another client without
incurring additional delays, clients competing for
cache buffers can evict each other's blocks and
interfere with exclusive caching schemes. We present a
global noncentralized, dynamic and informed management
policy for multiple levels of cache, accessed by
multiple clients.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{deBruijn:2011:ATS,
author = "Willem de Bruijn and Herbert Bos and Henri Bal",
title = "Application-Tailored {I/O} with {Streamline}",
journal = j-TOCS,
volume = "29",
number = "2",
pages = "6:1--6:??",
month = may,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/1963559.1963562",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon May 9 16:05:59 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Streamline is a stream-based OS communication
subsystem that spans from peripheral hardware to
userspace processes. It improves performance of
I/O-bound applications (such as webservers and
streaming media applications) by constructing
tailor-made I/O paths through the operating system for
each application at runtime. Path optimization removes
unnecessary copying, context switching and cache
replacement and integrates specialized hardware.
Streamline automates optimization and only presents
users a clear, concise job control language based on
Unix pipelines. For backward compatibility Streamline
also presents well known files, pipes and sockets
abstractions.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ayari:2011:DPR,
author = "Brahim Ayari and Abdelmajid Khelil and Neeraj Suri",
title = "On the design of perturbation-resilient atomic commit
protocols for mobile transactions",
journal = j-TOCS,
volume = "29",
number = "3",
pages = "7:1--7:??",
month = aug,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2003690.2003691",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Aug 24 18:08:12 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Distributed mobile transactions utilize commit
protocols to achieve atomicity and consistent
decisions. This is challenging, as mobile environments
are typically characterized by frequent perturbations
such as network disconnections and node failures. On
one hand environmental constraints on mobile
participants and wireless links may increase the
resource blocking time of fixed participants. On the
other hand frequent node and link failures complicate
the design of atomic commit protocols by increasing
both the transaction abort rate and resource blocking
time. Hence, the deployment of classical commit
protocols (such as two-phase commit) does not
reasonably extend to distributed infrastructure-based
mobile environments driving the need for
perturbation-resilient commit protocols.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kalibera:2011:SRT,
author = "Tomas Kalibera and Filip Pizlo and Antony L. Hosking
and Jan Vitek",
title = "Scheduling real-time garbage collection on
uniprocessors",
journal = j-TOCS,
volume = "29",
number = "3",
pages = "8:1--8:??",
month = aug,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2003690.2003692",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Aug 24 18:08:12 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Managed languages such as Java and C\# are
increasingly being considered for hard real-time
applications because of their productivity and software
engineering advantages. Automatic memory management, or
garbage collection, is a key enabler for robust,
reusable libraries, yet remains a challenge for
analysis and implementation of real-time execution
environments. This article comprehensively compares
leading approaches to hard real-time garbage
collection. There are many design decisions involved in
selecting a real-time garbage collection algorithm. For
time-based garbage collectors on uniprocessors one must
choose whether to use periodic, slack-based or hybrid
scheduling. A significant impediment to valid
experimental comparison of such choices is that
commercial implementations use completely different
proprietary infrastructures.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Reddi:2011:MPE,
author = "Vijay Janapa Reddi and Benjamin C. Lee and Trishul
Chilimbi and Kushagra Vaid",
title = "Mobile processors for energy-efficient web search",
journal = j-TOCS,
volume = "29",
number = "3",
pages = "9:1--9:??",
month = aug,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2003690.2003693",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Aug 24 18:08:12 MDT 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "As cloud and utility computing spreads, computer
architects must ensure continued capability growth for
the data centers that comprise the cloud. Given
megawatt scale power budgets, increasing data center
capability requires increasing computing hardware
energy efficiency. To increase the data center's
capability for work, the work done per Joule must
increase. We pursue this efficiency even as the nature
of data center applications evolves. Unlike traditional
enterprise workloads, which are typically memory or I/O
bound, big data computation and analytics exhibit
greater compute intensity. This article examines the
efficiency of mobile processors as a means for data
center capability. In particular, we compare and
contrast the performance and efficiency of the
Microsoft Bing search engine executing on the
mobile-class Atom processor and the server-class Xeon
processor.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Srivatsa:2011:ESA,
author = "Mudhakar Srivatsa and Ling Liu and Arun Iyengar",
title = "{EventGuard}: a System Architecture for Securing
Publish--Subscribe Networks",
journal = j-TOCS,
volume = "29",
number = "4",
pages = "10:1--10:??",
month = dec,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2063509.2063510",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Dec 30 17:52:02 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Publish-subscribe (pub-sub) is an emerging paradigm
for building a large number of distributed systems. A
wide area pub-sub system is usually implemented on an
overlay network infrastructure to enable information
dissemination from publishers to subscribers. Using an
open overlay network raises several security concerns
such as: confidentiality and integrity, authentication,
authorization and Denial-of-Service (DoS) attacks. In
this article we present EventGuard, a framework for
building secure wide-area pub-sub systems. The
EventGuard architecture is comprised of three key
components: (1) a suite of security guards that can be
seamlessly plugged-into a content-based pub-sub system,
(2) a scalable key management algorithm to enforce
access control on subscribers, and (3) a resilient
pub-sub network design that is capable of scalable
routing, handling message dropping-based DoS attacks,
and node failures.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Marinescu:2011:ETR,
author = "Paul D. Marinescu and George Candea",
title = "Efficient Testing of Recovery Code Using Fault
Injection",
journal = j-TOCS,
volume = "29",
number = "4",
pages = "11:1--11:??",
month = dec,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2063509.2063511",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Dec 30 17:52:02 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "A critical part of developing a reliable software
system is testing its recovery code. This code is
traditionally difficult to test in the lab, and, in the
field, it rarely gets to run; yet, when it does run, it
must execute flawlessly in order to recover the system
from failure. In this article, we present a
library-level fault injection engine that enables the
productive use of fault injection for software testing.
We describe automated techniques for reliably
identifying errors that applications may encounter when
interacting with their environment, for automatically
identifying high-value injection targets in program
binaries, and for producing efficient injection test
scenarios.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Mahajan:2011:DCS,
author = "Prince Mahajan and Srinath Setty and Sangmin Lee and
Allen Clement and Lorenzo Alvisi and Mike Dahlin and
Michael Walfish",
title = "{Depot}: Cloud Storage with Minimal Trust",
journal = j-TOCS,
volume = "29",
number = "4",
pages = "12:1--12:??",
month = dec,
year = "2011",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2063509.2063512",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Dec 30 17:52:02 MST 2011",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article describes the design, implementation, and
evaluation of Depot, a cloud storage system that
minimizes trust assumptions. Depot tolerates buggy or
malicious behavior by any number of clients or servers,
yet it provides safety and liveness guarantees to
correct clients. Depot provides these guarantees using
a two-layer architecture. First, Depot ensures that the
updates observed by correct nodes are consistently
ordered under Fork-Join-Causal consistency (FJC). FJC
is a slight weakening of causal consistency that can be
both safe and live despite faulty nodes. Second, Depot
implements protocols that use this consistent ordering
of updates to provide other desirable consistency,
staleness, durability, and recovery properties.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Mowry:2012:ISI,
author = "Todd C. Mowry",
title = "Introduction to Special Issue {APLOS 2011}",
journal = j-TOCS,
volume = "30",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2110356.2110357",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Mar 1 16:31:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Chipounov:2012:SPD,
author = "Vitaly Chipounov and Volodymyr Kuznetsov and George
Candea",
title = "The {S2E} Platform: Design, Implementation, and
Applications",
journal = j-TOCS,
volume = "30",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2110356.2110358",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Mar 1 16:31:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article presents S2E, a platform for analyzing
the properties and behavior of software systems, along
with its use in developing tools for comprehensive
performance profiling, reverse engineering of
proprietary software, and automated testing of
kernel-mode and user-mode binaries. Conceptually, S2E
is an automated path explorer with modular path
analyzers: the explorer uses a symbolic execution
engine to drive the target system down all execution
paths of interest, while analyzers measure and/or check
properties of each such path. S2E users can either
combine existing analyzers to build custom analysis
tools, or they can directly use S2E's APIs. S2E's
strength is the ability to scale to large systems, such
as a full Windows stack, using two new ideas: selective
symbolic execution, a way to automatically minimize the
amount of code that has to be executed symbolically
given a target analysis, and execution consistency
models, a way to make principled performance/accuracy
trade-offs \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Veeraraghavan:2012:DPS,
author = "Kaushik Veeraraghavan and Dongyoon Lee and Benjamin
Wester and Jessica Ouyang and Peter M. Chen and Jason
Flinn and Satish Narayanasamy",
title = "{DoublePlay}: Parallelizing Sequential Logging and
Replay",
journal = j-TOCS,
volume = "30",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2110356.2110359",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Mar 1 16:31:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Deterministic replay systems record and reproduce the
execution of a hardware or software system. In contrast
to replaying execution on uniprocessors, deterministic
replay on multiprocessors is very challenging to
implement efficiently because of the need to reproduce
the order of or the values read by shared memory
operations performed by multiple threads. In this
paper, we present DoublePlay, a new way to efficiently
guarantee replay on commodity multiprocessors. Our key
insight is that one can use the simpler and faster
mechanisms of single-processor record and replay, yet
still achieve the scalability offered by multiple
cores, by using an additional execution to parallelize
the record and replay of an application.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yuan:2012:ISD,
author = "Ding Yuan and Jing Zheng and Soyeon Park and Yuanyuan
Zhou and Stefan Savage",
title = "Improving Software Diagnosability via Log
Enhancement",
journal = j-TOCS,
volume = "30",
number = "1",
pages = "4:1--4:??",
month = feb,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2110356.2110360",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Mar 1 16:31:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Diagnosing software failures in the field is
notoriously difficult, in part due to the fundamental
complexity of troubleshooting any complex software
system, but further exacerbated by the paucity of
information that is typically available in the
production setting. Indeed, for reasons of both
overhead and privacy, it is common that only the
run-time log generated by a system (e.g., syslog) can
be shared with the developers. Unfortunately, the
ad-hoc nature of such reports are frequently
insufficient for detailed failure diagnosis. This paper
seeks to improve this situation within the rubric of
existing practice. We describe a tool, LogEnhancer that
automatically ``enhances'' existing logging code to aid
in future post-failure debugging.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Schupbach:2012:DLA,
author = "Adrian Sch{\"u}pbach and Andrew Baumann and Timothy
Roscoe and Simon Peter",
title = "A Declarative Language Approach to Device
Configuration",
journal = j-TOCS,
volume = "30",
number = "1",
pages = "5:1--5:??",
month = feb,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2110356.2110361",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Mar 1 16:31:38 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "C remains the language of choice for hardware
programming (device drivers, bus configuration, etc.):
it is fast, allows low-level access, and is trusted by
OS developers. However, the algorithms required to
configure and reconfigure hardware devices and
interconnects are becoming more complex and diverse,
with the added burden of legacy support, ``quirks,''
and hardware bugs to work around. Even programming PCI
bridges in a modern PC is a surprisingly complex
problem, and is getting worse as new functionality such
as hotplug appears. Existing approaches use relatively
simple algorithms, hard-coded in C and closely coupled
with low-level register access code, generally leading
to suboptimal configurations.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Saez:2012:LCS,
author = "Juan Carlos Saez and Alexandra Fedorova and David
Koufaty and Manuel Prieto",
title = "Leveraging Core Specialization via {OS} Scheduling to
Improve Performance on Asymmetric Multicore Systems",
journal = j-TOCS,
volume = "30",
number = "2",
pages = "6:1--6:??",
month = apr,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2166879.2166880",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Apr 27 12:10:22 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Asymmetric multicore processors (AMPs) consist of
cores with the same ISA (instruction-set architecture),
but different microarchitectural features, speed, and
power consumption. Because cores with more complex
features and higher speed typically use more area and
consume more energy relative to simpler and slower
cores, we must use these cores for running applications
that experience significant performance improvements
from using those features. Having cores of different
types in a single system allows optimizing the
performance/energy trade-off. To deliver this potential
to unmodified applications, the OS scheduler must map
threads to cores in consideration of the properties of
both. Our work describes a Comprehensive scheduler for
Asymmetric Multicore Processors (CAMP) that addresses
shortcomings of previous asymmetry-aware schedulers.
First, previous schedulers catered to only one kind of
workload properties that are crucial for scheduling on
AMPs; either efficiency or thread-level parallelism
(TLP), but not both. CAMP overcomes this limitation
showing how using both efficiency and TLP in synergy in
a single scheduling algorithm can improve performance.
Second, most existing schedulers relying on models for
estimating how much faster a thread executes on a
``fast'' vs. ``slow'' core (i.e., the speedup factor )
were specifically designed for AMP systems where cores
differ only in clock frequency. However, more realistic
AMP systems include cores that differ more
significantly in their features. To demonstrate the
effectiveness of CAMP on more realistic scenarios, we
augmented the CAMP scheduler with a model that predicts
the speedup factor on a real AMP prototype that closely
matches future asymmetric systems.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ebrahimi:2012:FST,
author = "Eiman Ebrahimi and Chang Joo Lee and Onur Mutlu and
Yale N. Patt",
title = "Fairness via Source Throttling: a Configurable and
High-Performance Fairness Substrate for Multicore
Memory Systems",
journal = j-TOCS,
volume = "30",
number = "2",
pages = "7:1--7:??",
month = apr,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2166879.2166881",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Apr 27 12:10:22 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Cores in chip-multiprocessors (CMPs) share multiple
memory subsystem resources. If resource sharing is
unfair, some applications can be delayed significantly
while others are unfairly prioritized. Previous
research proposed separate fairness mechanisms for each
resource. Such resource-based fairness mechanisms
implemented independently in each resource can make
contradictory decisions, leading to low fairness and
performance loss. Therefore, a coordinated mechanism
that provides fairness in the entire shared memory
system is desirable. This article proposes a new
approach that provides fairness in the entire shared
memory system, thereby eliminating the need for and
complexity of developing fairness mechanisms for each
resource. Our technique, Fairness via Source Throttling
(FST), estimates unfairness in the entire memory
system. If unfairness is above a system-software-set
threshold, FST throttles down cores causing unfairness
by limiting the number of requests they create and the
frequency at which they do. As such, our source-based
fairness control ensures fairness decisions are made in
tandem in the entire memory system. FST enforces thread
priorities/weights, and enables system-software to
enforce different fairness objectives in the memory
system. Our evaluations show that FST provides the best
system fairness and performance compared to three
systems with state-of-the-art fairness mechanisms
implemented in both shared caches and memory
controllers.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Gebhart:2012:HTS,
author = "Mark Gebhart and Daniel R. Johnson and David Tarjan
and Stephen W. Keckler and William J. Dally and Erik
Lindholm and Kevin Skadron",
title = "A Hierarchical Thread Scheduler and Register File for
Energy-Efficient Throughput Processors",
journal = j-TOCS,
volume = "30",
number = "2",
pages = "8:1--8:??",
month = apr,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2166879.2166882",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Apr 27 12:10:22 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Modern graphics processing units (GPUs) employ a large
number of hardware threads to hide both function unit
and memory access latency. Extreme multithreading
requires a complex thread scheduler as well as a large
register file, which is expensive to access both in
terms of energy and latency. We present two
complementary techniques for reducing energy on
massively-threaded processors such as GPUs. First, we
investigate a two-level thread scheduler that maintains
a small set of active threads to hide ALU and local
memory access latency and a larger set of pending
threads to hide main memory latency. Reducing the
number of threads that the scheduler must consider each
cycle improves the scheduler's energy efficiency.
Second, we propose replacing the monolithic register
file found on modern designs with a hierarchical
register file. We explore various trade-offs for the
hierarchy including the number of levels in the
hierarchy and the number of entries at each level. We
consider both a hardware-managed caching scheme and a
software-managed scheme, where the compiler is
responsible for orchestrating all data movement within
the register file hierarchy. Combined with a
hierarchical register file, our two-level thread
scheduler provides a further reduction in energy by
only allocating entries in the upper levels of the
register file hierarchy for active threads. Averaging
across a variety of real world graphics and compute
workloads, the active thread count can be reduced by a
factor of 4 with minimal impact on performance and our
most efficient three-level software-managed register
file hierarchy reduces register file energy by 54\%.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Dall:2012:DIE,
author = "Christoffer Dall and Jeremy Andrus and Alexander Van't
Hof and Oren Laadan and Jason Nieh",
title = "The Design, Implementation, and Evaluation of Cells:
a Virtual {Smartphone} Architecture",
journal = j-TOCS,
volume = "30",
number = "3",
pages = "9:1--9:??",
month = aug,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2324876.2324877",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Aug 20 16:33:58 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Smartphones are increasingly ubiquitous, and many
users carry multiple phones to accommodate work,
personal, and geographic mobility needs. We present
Cells, a virtualization architecture for enabling
multiple virtual smartphones to run simultaneously on
the same physical cellphone in an isolated, secure
manner. Cells introduces a usage model of having one
foreground virtual phone and multiple background
virtual phones. This model enables a new device
namespace mechanism and novel device proxies that
integrate with lightweight operating system
virtualization to multiplex phone hardware across
multiple virtual phones while providing native hardware
device performance. Cells virtual phone features
include fully accelerated 3D graphics, complete power
management features, and full telephony functionality
with separately assignable telephone numbers and caller
ID support. We have implemented a prototype of Cells
that supports multiple Android virtual phones on the
same phone. Our performance results demonstrate that
Cells imposes only modest runtime and memory overhead,
works seamlessly across multiple hardware devices
including Google Nexus 1 and Nexus S phones, and
transparently runs Android applications at native speed
without any modifications.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Harter:2012:FFU,
author = "Tyler Harter and Chris Dragga and Michael Vaughn and
Andrea C. Arpaci-Dusseau and Remzi H. Arpaci-Dusseau",
title = "A File Is Not a File: Understanding the {I/O} Behavior
of {Apple} Desktop Applications",
journal = j-TOCS,
volume = "30",
number = "3",
pages = "10:1--10:??",
month = aug,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2324876.2324878",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Aug 20 16:33:58 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We analyze the I/O behavior of iBench, a new
collection of productivity and multimedia application
workloads. Our analysis reveals a number of differences
between iBench and typical file-system workload
studies, including the complex organization of modern
files, the lack of pure sequential access, the
influence of underlying frameworks on I/O patterns, the
widespread use of file synchronization and atomic
operations, and the prevalence of threads. Our results
have strong ramifications for the design of next
generation local and cloud-based storage systems.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Esmaeilzadeh:2012:PLD,
author = "Hadi Esmaeilzadeh and Emily Blem and Ren{\'e}e {St.
Amant} and Karthikeyan Sankaralingam and Doug Burger",
title = "Power Limitations and Dark Silicon Challenge the
Future of Multicore",
journal = j-TOCS,
volume = "30",
number = "3",
pages = "11:1--11:??",
month = aug,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2324876.2324879",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Aug 20 16:33:58 MDT 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Since 2004, processor designers have increased core
counts to exploit Moore's Law scaling, rather than
focusing on single-core performance. The failure of
Dennard scaling, to which the shift to multicore parts
is partially a response, may soon limit multicore
scaling just as single-core scaling has been curtailed.
This paper models multicore scaling limits by combining
device scaling, single-core scaling, and multicore
scaling to measure the speedup potential for a set of
parallel workloads for the next five technology
generations. For device scaling, we use both the ITRS
projections and a set of more conservative device
scaling parameters. To model single-core scaling, we
combine measurements from over 150 processors to derive
Pareto-optimal frontiers for area/performance and
power/performance. Finally, to model multicore scaling,
we build a detailed performance model of upper-bound
performance and lower-bound core power. The multicore
designs we study include single-threaded CPU-like and
massively threaded GPU-like multicore chip
organizations with symmetric, asymmetric, dynamic, and
composed topologies. The study shows that regardless of
chip organization and topology, multicore scaling is
power limited to a degree not widely appreciated by the
computing community. Even at 22 nm (just one year from
now), 21\% of a fixed-size chip must be powered off,
and at 8 nm, this number grows to more than 50\%.
Through 2024, only 7.9$\times$ average speedup is
possible across commonly used parallel workloads for
the topologies we study, leaving a nearly 24-fold gap
from a target of doubled performance per generation.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bugnion:2012:BVX,
author = "Edouard Bugnion and Scott Devine and Mendel Rosenblum
and Jeremy Sugerman and Edward Y. Wang",
title = "Bringing Virtualization to the x86 Architecture with
the Original {VMware} Workstation",
journal = j-TOCS,
volume = "30",
number = "4",
pages = "12:1--12:51",
month = nov,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2382553.2382554",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 19:34:49 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article describes the historical context,
technical challenges, and main implementation
techniques used by VMware Workstation to bring
virtualization to the x86 architecture in 1999.
Although virtual machine monitors (VMMs) had been
around for decades, they were traditionally designed as
part of monolithic, single-vendor architectures with
explicit support for virtualization. In contrast, the
x86 architecture lacked virtualization support, and the
industry around it had disaggregated into an ecosystem,
with different vendors controlling the computers, CPUs,
peripherals, operating systems, and applications, none
of them asking for virtualization. We chose to build
our solution independently of these vendors. As a
result, VMware Workstation had to deal with new
challenges associated with (i) the lack of
virtualization support in the x86 architecture, (ii)
the daunting complexity of the architecture itself,
(iii) the need to support a broad combination of
peripherals, and (iv) the need to offer a simple user
experience within existing environments. These new
challenges led us to a novel combination of well-known
virtualization techniques, techniques from other
domains, and new techniques. VMware Workstation
combined a hosted architecture with a VMM. The hosted
architecture enabled a simple user experience and
offered broad hardware compatibility. Rather than
exposing I/O diversity to the virtual machines, VMware
Workstation also relied on software emulation of I/O
devices. The VMM combined a trap-and-emulate direct
execution engine with a system-level dynamic binary
translator to efficiently virtualize the x86
architecture and support most commodity operating
systems. By relying on x86 hardware segmentation as a
protection mechanism, the binary translator could
execute translated code at near hardware speeds. The
binary translator also relied on partial evaluation and
adaptive retranslation to reduce the overall overheads
of virtualization. Written with the benefit of
hindsight, this article shares the key lessons we
learned from building the original system and from its
later evolution.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Erlingsson:2012:FED,
author = "{\'U}lfar Erlingsson and Marcus Peinado and Simon
Peter and Mihai Budiu and Gloria Mainar-Ruiz",
title = "{Fay}: Extensible Distributed Tracing from Kernels to
Clusters",
journal = j-TOCS,
volume = "30",
number = "4",
pages = "13:1--13:??",
month = nov,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2382553.2382555",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 19:34:49 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Fay is a flexible platform for the efficient
collection, processing, and analysis of software
execution traces. Fay provides dynamic tracing through
use of runtime instrumentation and distributed
aggregation within machines and across clusters. At the
lowest level, Fay can be safely extended with new
tracing primitives, including even untrusted, fully
optimized machine code, and Fay can be applied to
running user-mode or kernel-mode software without
compromising system stability. At the highest level,
Fay provides a unified, declarative means of specifying
what events to trace, as well as the aggregation,
processing, and analysis of those events. We have
implemented the Fay tracing platform for Windows and
integrated it with two powerful, expressive systems for
distributed programming. Our implementation is easy to
use, can be applied to unmodified production systems,
and provides primitives that allow the overhead of
tracing to be greatly reduced, compared to previous
dynamic tracing platforms. To show the generality of
Fay tracing, we reimplement, in experiments, a range of
tracing strategies and several custom mechanisms from
existing tracing frameworks. Fay shows that modern
techniques for high-level querying and data-parallel
processing of disagreggated data streams are well
suited to comprehensive monitoring of software
execution in distributed systems. Revisiting a lesson
from the late 1960s [Deutsch and Grant 1971], Fay also
demonstrates the efficiency and extensibility benefits
of using safe, statically verified machine code as the
basis for low-level execution tracing. Finally, Fay
establishes that, by automatically deriving optimized
query plans and code for safe extensions, the
expressiveness and performance of high-level tracing
queries can equal or even surpass that of specialized
monitoring tools.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Gandhi:2012:ADR,
author = "Anshul Gandhi and Mor Harchol-Balter and Ram
Raghunathan and Michael A. Kozuch",
title = "{AutoScale}: Dynamic, Robust Capacity Management for
Multi-Tier Data Centers",
journal = j-TOCS,
volume = "30",
number = "4",
pages = "14:1--14:??",
month = nov,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2382553.2382556",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 19:34:49 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Energy costs for data centers continue to rise,
already exceeding \$15 billion yearly. Sadly much of
this power is wasted. Servers are only busy 10--30\% of
the time on average, but they are often left on, while
idle, utilizing 60\% or more of peak power when in the
idle state. We introduce a dynamic capacity management
policy, AutoScale, that greatly reduces the number of
servers needed in data centers driven by unpredictable,
time-varying load, while meeting response time SLAs.
AutoScale scales the data center capacity, adding or
removing servers as needed. AutoScale has two key
features: (i) it autonomically maintains just the right
amount of spare capacity to handle bursts in the
request rate; and (ii) it is robust not just to changes
in the request rate of real-world traces, but also
request size and server efficiency. We evaluate our
dynamic capacity management approach via implementation
on a 38-server multi-tier data center, serving a web
site of the type seen in Facebook or Amazon, with a
key-value store workload. We demonstrate that AutoScale
vastly improves upon existing dynamic capacity
management policies with respect to meeting SLAs and
robustness.",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ferdman:2012:QMB,
author = "Michael Ferdman and Almutaz Adileh and Onur Kocberber
and Stavros Volos and Mohammad Alisafaee and Djordje
Jevdjic and Cansu Kaynak and Adrian Daniel Popescu and
Anastasia Ailamaki and Babak Falsafi",
title = "Quantifying the Mismatch between Emerging Scale-Out
Applications and Modern Processors",
journal = j-TOCS,
volume = "30",
number = "4",
pages = "15:1--15:??",
month = nov,
year = "2012",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2382553.2382557",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Nov 29 19:34:49 MST 2012",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Emerging scale-out workloads require extensive amounts
of computational resources. However, data centers using
modern server hardware face physical constraints in
space and power, limiting further expansion and calling
for improvements in the computational density per
server and in the per-operation energy. Continuing to
improve the computational resources of the cloud while
staying within physical constraints mandates optimizing
server efficiency to ensure that server hardware
closely matches the needs of scale-out workloads. In
this work, we introduce CloudSuite, a benchmark suite
of emerging scale-out workloads. We use performance
counters on modern servers to study scale-out
workloads, finding that today's predominant processor
microarchitecture is inefficient for running these
workloads. We find that inefficiency comes from the
mismatch between the workload needs and modern
processors, particularly in the organization of
instruction and data memory systems and the processor
core microarchitecture. Moreover, while today's
predominant microarchitecture is inefficient when
executing scale-out workloads, we find that continuing
the current trends will further exacerbate the
inefficiency in the future. In this work, we identify
the key microarchitectural needs of scale-out
workloads, calling for a change in the trajectory of
server processors that would lead to improved
computational density and power efficiency in data
centers.",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Wu:2013:ERD,
author = "Meng-Ju Wu and Donald Yeung",
title = "Efficient Reuse Distance Analysis of Multicore Scaling
for Loop-Based Parallel Programs",
journal = j-TOCS,
volume = "31",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2427631.2427632",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 23 06:37:57 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Reuse Distance (RD) analysis is a powerful memory
analysis tool that can potentially help architects
study multicore processor scaling. One key obstacle,
however, is that multicore RD analysis requires
measuring Concurrent Reuse Distance (CRD) and
Private-LRU-stack Reuse Distance (PRD) profiles across
thread-interleaved memory reference streams.
Sensitivity to memory interleaving makes CRD and PRD
profiles architecture dependent, preventing them from
analyzing different processor configurations. For
loop-based parallel programs, CRD and PRD profiles
shift coherently across RD values with core count
scaling because interleaving threads are symmetric.
Simple techniques can predict such shifting, making the
analysis of numerous multicore configurations from a
small set of CRD and PRD profiles feasible. Given the
ubiquity of parallel loops, such techniques will be
extremely valuable for studying future large multicore
designs. This article investigates using RD analysis to
efficiently analyze multicore cache performance for
loop-based parallel programs, making several
contributions. First, we provide an in-depth analysis
on how CRD and PRD profiles change with core count
scaling. Second, we develop techniques to predict CRD
and PRD profile scaling, in particular employing
reference groups [Zhong et al. 2003] to predict
coherent shift, demonstrating 90\% or greater
prediction accuracy. Third, our CRD and PRD profile
analyses define two application parameters with
architectural implications: C$_{core}$ is the minimum
shared cache capacity that ``contains'' locality
degradation due to core count scaling, and C$_{share}$
is the capacity at which shared caches begin to provide
a cache-miss reduction compared to private caches. And
fourth, we apply CRD and PRD profiles to analyze
multicore cache performance. When combined with
existing problem scaling prediction, our techniques can
predict shared LLC MPKI (private L2 cache MPKI) to
within 10.7\% (13.9\% ) of simulation across 1,728
(1,440) configurations using only 36 measured CRD (PRD)
profiles.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Govindan:2013:ADP,
author = "Sriram Govindan and Di Wang and Anand Sivasubramaniam
and Bhuvan Urgaonkar",
title = "Aggressive Datacenter Power Provisioning with
Batteries",
journal = j-TOCS,
volume = "31",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2427631.2427633",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 23 06:37:57 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Datacenters spend \$10--\$25 per watt in
provisioning their power infrastructure, regardless of
the watts actually consumed. Since peak power needs
arise rarely, provisioning power infrastructure for
them can be expensive. One can, thus, aggressively
underprovision infrastructure assuming that
simultaneous peak draw across all equipment will happen
rarely. The resulting nonzero probability of emergency
events where power needs exceed provisioned capacity,
however small, mandates graceful reaction mechanisms to
cap the power draw instead of leaving it to disruptive
circuit breakers/fuses. Existing strategies for power
capping use temporal knobs local to a server that
throttle the rate of execution (using power modes),
and/or spatial knobs that redirect/migrate excess load
to regions of the datacenter with more power headroom.
We show these mechanisms to have performance degrading
ramifications, and propose an entirely orthogonal
solution that leverages existing UPS batteries to
temporarily augment the utility supply during
emergencies.We build an experimental prototype to
demonstrate such power capping on a cluster of 8
servers, each with an individual battery, and implement
several online heuristics in the context of different
datacenter workloads to evaluate their effectiveness in
handling power emergencies. We show that our
battery-based solution can: (i) handle emergencies of
short durations on its own, (ii) supplement existing
reaction mechanisms to enhance their efficacy for
longer emergencies, and (iii) create more slack for
shifting applications temporarily to nonpeak
durations.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Rasmussen:2013:TBE,
author = "Alexander Rasmussen and George Porter and Michael
Conley and Harsha V. Madhyastha and Radhika Niranjan
Mysore and Alexander Pucher and Amin Vahdat",
title = "{TritonSort}: a Balanced and Energy-Efficient
Large-Scale Sorting System",
journal = j-TOCS,
volume = "31",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2427631.2427634",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 23 06:37:57 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We present TritonSort, a highly efficient, scalable
sorting system. It is designed to process large
datasets, and has been evaluated against as much as
100TB of input data spread across 832 disks in 52 nodes
at a rate of 0.938TB/min. When evaluated against the
annual Indy GraySort sorting benchmark, TritonSort is
66\% better in absolute performance and has over six
times the per-node throughput of the previous record
holder. When evaluated against the 100TB Indy JouleSort
benchmark, TritonSort sorted 9703 records/Joule. In
this article, we describe the hardware and software
architecture necessary to operate TritonSort at this
level of efficiency. Through careful management of
system resources to ensure cross-resource balance, we
are able to sort data at approximately 80\% of the
disks' aggregate sequential write speed. We believe the
work holds a number of lessons for balanced system
design and for scale-out architectures in general.
While many interesting systems are able to scale
linearly with additional servers, per-server
performance can lag behind per-server capacity by more
than an order of magnitude. Bridging the gap between
high scalability and high performance would enable
either significantly less expensive systems that are
able to do the same work or provide the ability to
address significantly larger problem sets with the same
infrastructure.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jayaram:2013:PCB,
author = "K. R. Jayaram and Patrick Eugster and Chamikara
Jayalath",
title = "Parametric Content-Based Publish\slash Subscribe",
journal = j-TOCS,
volume = "31",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2465346.2465347",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Jun 1 11:24:04 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Content-based publish/subscribe (CPS) is an appealing
abstraction for building scalable distributed systems,
e.g., message boards, intrusion detectors, or
algorithmic stock trading platforms. Recently, CPS
extensions have been proposed for location-based
services like vehicular networks, mobile social
networking, and so on. Although current CPS middleware
systems are dynamic in the way they support the joining
and leaving of publishers and subscribers, they fall
short in supporting subscription adaptations. These are
becoming increasingly important across many CPS
applications. In algorithmic high frequency trading,
for instance, stock price thresholds that are of
interest to a trader change rapidly, and gains directly
hinge on the reaction time to relevant fluctuations
rather than fixed values. In location-aware
applications, a subscription is a function of the
subscriber location (e.g. GPS coordinates), which
inherently changes during motion. The common solution
for adapting a subscription consists of a
resubscription, where a new subscription is issued and
the superseded one canceled. This incurs substantial
overhead in CPS middleware systems, and leads to missed
or duplicated events during the transition. In this
article, we explore the concept of parametric
subscriptions for capturing subscription adaptations.
We discuss desirable and feasible guarantees for
corresponding support, and propose novel algorithms for
updating routing mechanisms effectively and efficiently
in classic decentralized CPS broker overlay networks.
Compared to resubscriptions, our algorithms
significantly improve the reaction time to subscription
updates without hampering throughput or latency under
high update rates. We also propose and evaluate
approximation techniques to detect and mitigate
pathological cases of high frequency subscription
oscillations, which could significantly decrease the
throughput of CPS systems thereby affecting other
subscribers. We analyze the benefits of our support
through implementations of our algorithms in two CPS
systems, and by evaluating our algorithms on two
different application scenarios.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Smaldone:2013:OSP,
author = "Stephen Smaldone and Benjamin Gilbert and Jan Harkes
and Liviu Iftode and Mahadev Satyanarayanan",
title = "Optimizing Storage Performance for {VM}-Based Mobile
Computing",
journal = j-TOCS,
volume = "31",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2465346.2465348",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Jun 1 11:24:04 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article investigates the transient use of free
local storage for improving performance in VM-based
mobile computing systems implemented as thick clients
on host PCs. We use the term TransientPC systems to
refer to these types of systems. The solution we
propose, called TransPart, uses the higher-performing
local storage of host hardware to speed up
performance-critical operations. Our solution
constructs a virtual storage device on demand (which we
call transient storage ) by borrowing free disk blocks
from the host's storage. In this article, we present
the design, implementation, and evaluation of a
TransPart prototype, which requires no modifications to
the software or hardware of a host computer.
Experimental results confirm that TransPart offers low
overhead and startup cost, while improving user
experience.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lee:2013:ETB,
author = "Yunsup Lee and Rimas Avizienis and Alex Bishara and
Richard Xia and Derek Lockhart and Christopher Batten
and Krste Asanovi{\'c}",
title = "Exploring the Tradeoffs between Programmability and
Efficiency in Data-Parallel Accelerators",
journal = j-TOCS,
volume = "31",
number = "3",
pages = "6:1--6:??",
month = aug,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2491464",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Aug 28 17:03:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We present a taxonomy and modular implementation
approach for data-parallel accelerators, including the
MIMD, vector-SIMD, subword-SIMD, SIMT, and
vector-thread (VT) architectural design patterns. We
introduce Maven, a new VT microarchitecture based on
the traditional vector-SIMD microarchitecture, that is
considerably simpler to implement and easier to program
than previous VT designs. Using an extensive
design-space exploration of full VLSI implementations
of many accelerator design points, we evaluate the
varying tradeoffs between programmability and
implementation efficiency among the MIMD, vector-SIMD,
and VT patterns on a workload of compiled
microbenchmarks and application kernels. We find the
vector cores provide greater efficiency than the MIMD
cores, even on fairly irregular kernels. Our results
suggest that the Maven VT microarchitecture is superior
to the traditional vector-SIMD architecture, providing
both greater efficiency and easier programmability.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Gamage:2013:PRO,
author = "Sahan Gamage and Ramana Rao Kompella and Dongyan Xu
and Ardalan Kangarlou",
title = "Protocol Responsibility Offloading to Improve {TCP}
Throughput in Virtualized Environments",
journal = j-TOCS,
volume = "31",
number = "3",
pages = "7:1--7:??",
month = aug,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2491463",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Aug 28 17:03:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Virtualization is a key technology that powers cloud
computing platforms such as Amazon EC2. Virtual machine
(VM) consolidation, where multiple VMs share a physical
host, has seen rapid adoption in practice, with
increasingly large numbers of VMs per machine and per
CPU core. Our investigations, however, suggest that the
increasing degree of VM consolidation has serious
negative effects on the VMs' TCP performance. As
multiple VMs share a given CPU, the scheduling
latencies, which can be in the order of tens of
milliseconds, substantially increase the typically
submillisecond round-trip times (RTTs) for TCP
connections in a datacenter, causing significant
degradation in throughput. In this article, we propose
a lightweight solution, called vPRO, that (a) offloads
the VM's TCP congestion control function to the driver
domain to improve TCP transmit performance; and (b)
offloads TCP acknowledgment functionality to the driver
domain to improve the TCP receive performance. Our
evaluation of a vPRO prototype on Xen suggests that
vPRO substantially improves TCP receive and transmit
throughputs with minimal per-packet CPU overhead. We
further show that the higher TCP throughput leads to
improvement in application-level performance, via
experiments with Apache Olio, a Web 2.0 cloud
application, and Intel MPI benchmark.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Corbett:2013:SGG,
author = "James C. Corbett and Jeffrey Dean and Michael Epstein
and Andrew Fikes and Christopher Frost and J. J. Furman
and Sanjay Ghemawat and Andrey Gubarev and Christopher
Heiser and Peter Hochschild and Wilson Hsieh and
Sebastian Kanthak and Eugene Kogan and Hongyi Li and
Alexander Lloyd and Sergey Melnik and David Mwaura and
David Nagle and Sean Quinlan and Rajesh Rao and Lindsay
Rolig and Yasushi Saito and Michal Szymaniak and
Christopher Taylor and Ruth Wang and Dale Woodford",
title = "{Spanner}: {Google}'s Globally Distributed Database",
journal = j-TOCS,
volume = "31",
number = "3",
pages = "8:1--8:??",
month = aug,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2491245",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Aug 28 17:03:36 MDT 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Spanner is Google's scalable, multiversion, globally
distributed, and synchronously replicated database. It
is the first system to distribute data at global scale
and support externally-consistent distributed
transactions. This article describes how Spanner is
structured, its feature set, the rationale underlying
various design decisions, and a novel time API that
exposes clock uncertainty. This API and its
implementation are critical to supporting external
consistency and a variety of powerful features:
nonblocking reads in the past, lock-free snapshot
transactions, and atomic schema changes, across all of
Spanner.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Mowry:2013:E,
author = "Todd C. Mowry",
title = "Editorial",
journal = j-TOCS,
volume = "31",
number = "4",
pages = "9:1--9:??",
month = dec,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2542150.2542151",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Dec 17 17:17:06 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Balakrishnan:2013:CDS,
author = "Mahesh Balakrishnan and Dahlia Malkhi and John D.
Davis and Vijayan Prabhakaran and Michael Wei and Ted
Wobber",
title = "{CORFU}: a distributed shared log",
journal = j-TOCS,
volume = "31",
number = "4",
pages = "10:1--10:??",
month = dec,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2535930",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Dec 17 17:17:06 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "CORFU is a global log which clients can append-to and
read-from over a network. Internally, CORFU is
distributed over a cluster of machines in such a way
that there is no single I/O bottleneck to either
appends or reads. Data is fully replicated for fault
tolerance, and a modest cluster of about 16--32
machines with SSD drives can sustain 1 million 4-KByte
operations per second. The CORFU log enabled the
construction of a variety of distributed applications
that require strong consistency at high speeds, such as
databases, transactional key-value stores, replicated
state machines, and metadata services.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bojnordi:2013:PMC,
author = "Mahdi Nazm Bojnordi and Engin Ipek",
title = "A programmable memory controller for the {DDRx}
interfacing standards",
journal = j-TOCS,
volume = "31",
number = "4",
pages = "11:1--11:??",
month = dec,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2534845",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Dec 17 17:17:06 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Modern memory controllers employ sophisticated address
mapping, command scheduling, and power management
optimizations to alleviate the adverse effects of DRAM
timing and resource constraints on system performance.
A promising way of improving the versatility and
efficiency of these controllers is to make them
programmable-a proven technique that has seen wide use
in other control tasks, ranging from DMA scheduling to
NAND Flash and directory control. Unfortunately, the
stringent latency and throughput requirements of modern
DDRx devices have rendered such programmability largely
impractical, confining DDRx controllers to
fixed-function hardware. This article presents the
instruction set architecture (ISA) and hardware
implementation of PARDIS, a programmable memory
controller that can meet the performance requirements
of a high-speed DDRx interface. The proposed controller
is evaluated by mapping previously proposed DRAM
scheduling, address mapping, refresh scheduling, and
power management algorithms onto PARDIS. Simulation
results show that the average performance of PARDIS
comes within 8\% of fixed-function hardware for each of
these techniques; moreover, by enabling
application-specific optimizations, PARDIS improves
system performance by 6 to 17\% and reduces DRAM energy
by 9 to 22\% over four existing memory controllers.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Delimitrou:2013:QAS,
author = "Christina Delimitrou and Christos Kozyrakis",
title = "{QoS}-Aware scheduling in heterogeneous datacenters
with {Paragon}",
journal = j-TOCS,
volume = "31",
number = "4",
pages = "12:1--12:??",
month = dec,
year = "2013",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2556583",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Dec 17 17:17:06 MST 2013",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Large-scale datacenters (DCs) host tens of thousands
of diverse applications each day. However, interference
between colocated workloads and the difficulty of
matching applications to one of the many hardware
platforms available can degrade performance, violating
the quality of service (QoS) guarantees that many cloud
workloads require. While previous work has identified
the impact of heterogeneity and interference, existing
solutions are computationally intensive, cannot be
applied online, and do not scale beyond a few
applications. We present Paragon, an online and
scalable DC scheduler that is heterogeneity- and
interference-aware. Paragon is derived from robust
analytical methods, and instead of profiling each
application in detail, it leverages information the
system already has about applications it has previously
seen. It uses collaborative filtering techniques to
quickly and accurately classify an unknown incoming
workload with respect to heterogeneity and interference
in multiple shared resources. It does so by identifying
similarities to previously scheduled applications. The
classification allows Paragon to greedily schedule
applications in a manner that minimizes interference
and maximizes server utilization. After the initial
application placement, Paragon monitors application
behavior and adjusts the scheduling decisions at
runtime to avoid performance degradations.
Additionally, we design ARQ, a multiclass admission
control protocol that constrains application waiting
time. ARQ queues applications in separate classes based
on the type of resources they need and avoids long
queueing delays for easy-to-satisfy workloads in
highly-loaded scenarios. Paragon scales to tens of
thousands of servers and applications with marginal
scheduling overheads in terms of time or state. We
evaluate Paragon with a wide range of workload
scenarios, on both small and large-scale systems,
including 1,000 servers on EC2. For a 2,500-workload
scenario, Paragon enforces performance guarantees for
91\% of applications, while significantly improving
utilization. In comparison, heterogeneity-oblivious,
interference-oblivious, and least-loaded schedulers
only provide similar guarantees for 14\%, 11\%, and 3\%
of workloads. The differences are more striking in
oversubscribed scenarios where resource efficiency is
more critical.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Silberstein:2014:GIF,
author = "Mark Silberstein and Bryan Ford and Idit Keidar and
Emmett Witchel",
title = "{GPUfs}: Integrating a file system with {GPUs}",
journal = j-TOCS,
volume = "32",
number = "1",
pages = "1:1--1:??",
month = feb,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2553081",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Feb 27 12:15:46 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "As GPU hardware becomes increasingly general-purpose,
it is quickly outgrowing the traditional, constrained
GPU-as-coprocessor programming model. This article
advocates for extending standard operating system
services and abstractions to GPUs in order to
facilitate program development and enable harmonious
integration of GPUs in computing systems. As an
example, we describe the design and implementation of
GPUFs, a software layer which provides operating system
support for accessing host files directly from GPU
programs. GPUFs provides a POSIX-like API, exploits GPU
parallelism for efficiency, and optimizes GPU file
access by extending the host CPU's buffer cache into
GPU memory. Our experiments, based on a set of real
benchmarks adapted to use our file system, demonstrate
the feasibility and benefits of the GPUFs approach. For
example, a self-contained GPU program that searches for
a set of strings throughout the Linux kernel source
tree runs over seven times faster than on an eight-core
CPU.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Klein:2014:CFV,
author = "Gerwin Klein and June Andronick and Kevin Elphinstone
and Toby Murray and Thomas Sewell and Rafal Kolanski
and Gernot Heiser",
title = "Comprehensive formal verification of an {OS}
microkernel",
journal = j-TOCS,
volume = "32",
number = "1",
pages = "2:1--2:??",
month = feb,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2560537",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Feb 27 12:15:46 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We present an in-depth coverage of the comprehensive
machine-checked formal verification of seL4, a
general-purpose operating system microkernel. We
discuss the kernel design we used to make its
verification tractable. We then describe the functional
correctness proof of the kernel's C implementation and
we cover further steps that transform this result into
a comprehensive formal verification of the kernel: a
formally verified IPC fastpath, a proof that the binary
code of the kernel correctly implements the C
semantics, a proof of correct access-control
enforcement, a proof of information-flow
noninterference, a sound worst-case execution time
analysis of the binary, and an automatic initialiser
for user-level systems that connects kernel-level
access-control enforcement with reasoning about system
behaviour. We summarise these results and show how they
integrate to form a coherent overall analysis, backed
by machine-checked, end-to-end theorems. The seL4
microkernel is currently not just the only
general-purpose operating system kernel that is fully
formally verified to this degree. It is also the only
example of formal proof of this scale that is kept
current as the requirements, design and implementation
of the system evolve over almost a decade. We report on
our experience in maintaining this evolving formally
verified code base.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Guevara:2014:MMM,
author = "Marisabel Guevara and Benjamin Lubin and Benjamin C.
Lee",
title = "Market mechanisms for managing datacenters with
heterogeneous microarchitectures",
journal = j-TOCS,
volume = "32",
number = "1",
pages = "3:1--3:??",
month = feb,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2541258",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Feb 27 12:15:46 MST 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Specialization of datacenter resources brings
performance and energy improvements in response to the
growing scale and diversity of cloud applications. Yet
heterogeneous hardware adds complexity and volatility
to latency-sensitive applications. A resource
allocation mechanism that leverages architectural
principles can overcome both of these obstacles. We
integrate research in heterogeneous architectures with
recent advances in multi-agent systems. Embedding
architectural insight into proxies that bid on behalf
of applications, a market effectively allocates
hardware to applications with diverse preferences and
valuations. Exploring a space of heterogeneous
datacenter configurations, which mix server-class Xeon
and mobile-class Atom processors, we find an optimal
heterogeneous balance that improves both welfare and
energy-efficiency. We further design and evaluate
twelve design points along the Xeon-to-Atom spectrum,
and find that a mix of three processor architectures
achieves a $ 12 \times $ reduction in response time
violations relative to equal-power homogeneous
systems.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Palix:2014:FL,
author = "Nicolas Palix and Gael Thomas and Suman Saha and
Christophe Calv{\`e}s and Gilles Muller and Julia
Lawall",
title = "Faults in {Linux 2.6}",
journal = j-TOCS,
volume = "32",
number = "2",
pages = "4:1--4:??",
month = jun,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2619090",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 7 16:54:52 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
abstract = "In August 2011, Linux entered its third decade. Ten
years before, Chou et al. published a study of faults
found by applying a static analyzer to Linux versions
1.0 through 2.4.1. A major result of their work was
that the drivers directory contained up to 7 times more
of certain kinds of faults than other directories. This
result inspired numerous efforts on improving the
reliability of driver code. Today, Linux is used in a
wider range of environments, provides a wider range of
services, and has adopted a new development and release
model. What has been the impact of these changes on
code quality? To answer this question, we have
transported Chou et al.'s experiments to all versions
of Linux 2.6 released between 2003 and 2011. We find
that Linux has more than doubled in size during this
period, but the number of faults per line of code has
been decreasing. Moreover, the fault rate of drivers is
now below that of other directories, such as arch.
These results can guide further development and
research efforts for the decade to come. To allow
updating these results as Linux evolves, we define our
experimental protocol and make our checkers
available.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Enck:2014:TIF,
author = "William Enck and Peter Gilbert and Seungyeop Han and
Vasant Tendulkar and Byung-Gon Chun and Landon P. Cox
and Jaeyeon Jung and Patrick McDaniel and Anmol N.
Sheth",
title = "{TaintDroid}: an Information-Flow Tracking System for
Realtime Privacy Monitoring on {Smartphones}",
journal = j-TOCS,
volume = "32",
number = "2",
pages = "5:1--5:??",
month = jun,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2619091",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 7 16:54:52 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Today's smartphone operating systems frequently fail
to provide users with visibility into how third-party
applications collect and share their private data. We
address these shortcomings with TaintDroid, an
efficient, system-wide dynamic taint tracking and
analysis system capable of simultaneously tracking
multiple sources of sensitive data. TaintDroid enables
realtime analysis by leveraging Android's virtualized
execution environment. TaintDroid incurs only 32\%
performance overhead on a CPU-bound microbenchmark and
imposes negligible overhead on interactive third-party
applications. Using TaintDroid to monitor the behavior
of 30 popular third-party Android applications, in our
2010 study we found 20 applications potentially misused
users' private information; so did a similar fraction
of the tested applications in our 2012 study.
Monitoring the flow of privacy-sensitive data with
TaintDroid provides valuable input for smartphone users
and security service firms seeking to identify
misbehaving applications.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Yu:2014:OBS,
author = "Young Jin Yu and Dong In Shin and Woong Shin and Nae
Young Song and Jae Woo Choi and Hyeong Seog Kim and
Hyeonsang Eom and Heon Young Yeom",
title = "Optimizing the Block {I/O} Subsystem for Fast Storage
Devices",
journal = j-TOCS,
volume = "32",
number = "2",
pages = "6:1--6:??",
month = jun,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2619092",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 7 16:54:52 MDT 2014",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Fast storage devices are an emerging solution to
satisfy data-intensive applications. They provide high
transaction rates for DBMS, low response times for Web
servers, instant on-demand paging for applications with
large memory footprints, and many similar advantages
for performance-hungry applications. In spite of the
benefits promised by fast hardware, modern operating
systems are not yet structured to take advantage of the
hardware's full potential. The software overhead caused
by an OS, negligible in the past, adversely impacts
application performance, lessening the advantage of
using such hardware. Our analysis demonstrates that the
overheads from the traditional storage-stack design are
significant and cannot easily be overcome without
modifying the hardware interface and adding new
capabilities to the operating system. In this article,
we propose six optimizations that enable an OS to fully
exploit the performance characteristics of fast storage
devices. With the support of new hardware interfaces,
our optimizations minimize per-request latency by
streamlining the I/O path and amortize per-request
latency by maximizing parallelism inside the device. We
demonstrate the impact on application performance
through well-known storage benchmarks run against a
Linux kernel with a customized SSD. We find that
eliminating context switches in the I/O path decreases
the software overhead of an I/O request from 20
microseconds to 5 microseconds and a new request merge
scheme called Temporal Merge enables the OS to achieve
87\% to 100\% of peak device performance, regardless of
request access patterns or types. Although the
performance improvement by these optimizations on a
standard SATA-based SSD is marginal (because of its
limited interface and relatively high response times),
our sensitivity analysis suggests that future SSDs with
lower response times will benefit from these changes.
The effectiveness of our optimizations encourages
discussion between the OS community and storage vendors
about future device interfaces for fast storage
devices.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Samadi:2014:SPS,
author = "Mehrzad Samadi and Janghaeng Lee and D. Anoushe
Jamshidi and Scott Mahlke and Amir Hormati",
title = "Scaling Performance via Self-Tuning Approximation for
Graphics Engines",
journal = j-TOCS,
volume = "32",
number = "3",
pages = "7:1--7:??",
month = sep,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2631913",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:28 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Approximate computing, where computation accuracy is
traded off for better performance or higher data
throughput, is one solution that can help data
processing keep pace with the current and growing
abundance of information. For particular domains, such
as multimedia and learning algorithms, approximation is
commonly used today. We consider automation to be
essential to provide transparent approximation, and we
show that larger benefits can be achieved by
constructing the approximation techniques to fit the
underlying hardware. Our target platform is the GPU
because of its high performance capabilities and
difficult programming challenges that can be alleviated
with proper automation. Our approach --- SAGE ---
combines a static compiler that automatically generates
a set of CUDA kernels with varying levels of
approximation with a runtime system that iteratively
selects among the available kernels to achieve speedup
while adhering to a target output quality set by the
user. The SAGE compiler employs three optimization
techniques to generate approximate kernels that exploit
the GPU microarchitecture: selective discarding of
atomic operations, data packing, and thread fusion.
Across a set of machine learning and image processing
kernels, SAGE's approximation yields an average of 2.5$
\times $ speedup with less than 10\% quality loss
compared to the accurate execution on a NVIDIA GTX 560
GPU.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Wu:2014:EAH,
author = "Lisa Wu and Orestis Polychroniou and Raymond J. Barker
and Martha A. Kim and Kenneth A. Ross",
title = "Energy Analysis of Hardware and Software Range
Partitioning",
journal = j-TOCS,
volume = "32",
number = "3",
pages = "8:1--8:??",
month = sep,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2638550",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:28 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Data partitioning is a critical operation for
manipulating large datasets because it subdivides tasks
into pieces that are more amenable to efficient
processing. It is often the limiting factor in database
performance and represents a significant fraction of
the overall runtime of large data queries. This article
measures the performance and energy of state-of-the-art
software partitioners, and describes and evaluates a
hardware range partitioner that further improves
efficiency. The software implementation is broken into
two phases, allowing separate analysis of the partition
function computation and data shuffling costs. Although
range partitioning is commonly thought to be more
expensive than simpler strategies such as hash
partitioning, our measurements indicate that careful
data movement and optimization of the partition
function can allow it to approach the throughput and
energy consumption of hash or radix partitioning. For
further acceleration, we describe a hardware range
partitioner, or HARP, a streaming framework that offers
a seamless execution environment for this and other
streaming accelerators, and a detailed analysis of a
32nm physical design that matches the throughput of
four to eight software threads while consuming just
6.9\% of the area and 4.3\% of the power of a Xeon core
in the same technology generation.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Sampson:2014:ASS,
author = "Adrian Sampson and Jacob Nelson and Karin Strauss and
Luis Ceze",
title = "Approximate Storage in Solid-State Memories",
journal = j-TOCS,
volume = "32",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2014",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2644808",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:28 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Memories today expose an all-or-nothing correctness
model that incurs significant costs in performance,
energy, area, and design complexity. But not all
applications need high-precision storage for all of
their data structures all of the time. This article
proposes mechanisms that enable applications to store
data approximately and shows that doing so can improve
the performance, lifetime, or density of solid-state
memories. We propose two mechanisms. The first allows
errors in multilevel cells by reducing the number of
programming pulses used to write them. The second
mechanism mitigates wear-out failures and extends
memory endurance by mapping approximate data onto
blocks that have exhausted their hardware error
correction resources. Simulations show that
reduced-precision writes in multilevel phase-change
memory cells can be 1.7 $ \times $ faster on average
and using failed blocks can improve array lifetime by
23\% on average with quality loss under 10\%.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Clements:2015:SCR,
author = "Austin T. Clements and M. Frans Kaashoek and Nickolai
Zeldovich and Robert T. Morris and Eddie Kohler",
title = "The Scalable Commutativity Rule: Designing Scalable
Software for Multicore Processors",
journal = j-TOCS,
volume = "32",
number = "4",
pages = "10:1--10:??",
month = jan,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2699681",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:30 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "What opportunities for multicore scalability are
latent in software interfaces, such as system call
APIs? Can scalability challenges and opportunities be
identified even before any implementation exists,
simply by considering interface specifications? To
answer these questions, we introduce the scalable
commutativity rule: whenever interface operations
commute, they can be implemented in a way that scales.
This rule is useful throughout the development process
for scalable multicore software, from the interface
design through implementation, testing, and evaluation.
This article formalizes the scalable commutativity
rule. This requires defining a novel form of
commutativity, SIM commutativity, that lets the rule
apply even to complex and highly stateful software
interfaces. We also introduce a suite of software
development tools based on the rule. Our Commuter tool
accepts high-level interface models, generates tests of
interface operations that commute and hence could
scale, and uses these tests to systematically evaluate
the scalability of implementations. We apply Commuter
to a model of 18 POSIX file and virtual memory system
operations. Using the resulting 26,238 scalability
tests, Commuter highlights Linux kernel problems
previously observed to limit application scalability
and identifies previously unknown bottlenecks that may
be triggered by future workloads or hardware. Finally,
we apply the scalable commutativity rule and Commuter
to the design and implementation sv6, a new POSIX-like
operating system. sv6's novel file and virtual memory
system designs enable it to scale for 99\% of the tests
generated by Commuter. These results translate to
linear scalability on an 80-core x86 machine for
applications built on sv6's commutative operations.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Nair:2015:MMA,
author = "Arun Arvind Nair and Stijn Eyerman and Jian Chen and
Lizy Kurian John and Lieven Eeckhout",
title = "Mechanistic Modeling of Architectural Vulnerability
Factor",
journal = j-TOCS,
volume = "32",
number = "4",
pages = "11:1--11:??",
month = jan,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2669364",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:30 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Reliability to soft errors is a significant design
challenge in modern microprocessors owing to an
exponential increase in the number of transistors on
chip and the reduction in operating voltages with each
process generation. Architectural Vulnerability Factor
(AVF) modeling using microarchitectural simulators
enables architects to make informed performance, power,
and reliability tradeoffs. However, such simulators are
time-consuming and do not reveal the microarchitectural
mechanisms that influence AVF. In this article, we
present an accurate first-order mechanistic analytical
model to compute AVF, developed using the first
principles of an out-of-order superscalar execution.
This model provides insight into the fundamental
interactions between the workload and microarchitecture
that together influence AVF. We use the model to
perform design space exploration, parametric sweeps,
and workload characterization for AVF.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Aublin:2015:NBP,
author = "Pierre-Louis Aublin and Rachid Guerraoui and Nikola
Knezevi{\'c} and Vivien Qu{\'e}ma and Marko
Vukoli{\'c}",
title = "The Next 700 {BFT} Protocols",
journal = j-TOCS,
volume = "32",
number = "4",
pages = "12:1--12:??",
month = jan,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2658994",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 21 07:18:30 MST 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "We present Abstract (ABortable STate mAChine
replicaTion), a new abstraction for designing and
reconfiguring generalized replicated state machines
that are, unlike traditional state machines, allowed to
abort executing a client's request if ``something goes
wrong.'' Abstract can be used to considerably simplify
the incremental development of efficient Byzantine
fault-tolerant state machine replication ( BFT)
protocols that are notorious for being difficult to
develop. In short, we treat a BFT protocol as a
composition of Abstract instances. Each instance is
developed and analyzed independently and optimized for
specific system conditions. We illustrate the power of
Abstract through several interesting examples. We first
show how Abstract can yield benefits of a
state-of-the-art BFT protocol in a less painful and
error-prone manner. Namely, we develop AZyzzyva, a new
protocol that mimics the celebrated best-case behavior
of Zyzzyva using less than 35\% of the Zyzzyva code. To
cover worst-case situations, our abstraction enables
one to use in AZyzzyva any existing BFT protocol. We
then present Aliph, a new BFT protocol that outperforms
previous BFT protocols in terms of both latency (by up
to 360\%) and throughput (by up to 30\%). Finally, we
present R-Aliph, an implementation of Aliph that is
robust, that is, whose performance degrades gracefully
in the presence of Byzantine replicas and Byzantine
clients.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Wang:2015:DAU,
author = "Xi Wang and Nickolai Zeldovich and M. Frans Kaashoek
and Armando Solar-Lezama",
title = "A Differential Approach to Undefined Behavior
Detection",
journal = j-TOCS,
volume = "33",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2699678",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Mar 13 07:03:25 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "This article studies undefined behavior arising in
systems programming languages such as C/C++. Undefined
behavior bugs lead to unpredictable and subtle systems
behavior, and their effects can be further amplified by
compiler optimizations. Undefined behavior bugs are
present in many systems, including the Linux kernel and
the Postgres database. The consequences range from
incorrect functionality to missing security checks.
This article proposes a formal and practical approach
that finds undefined behavior bugs by finding
``unstable code'' in terms of optimizations that
leverage undefined behavior. Using this approach, we
introduce a new static checker called Stack that
precisely identifies undefined behavior bugs. Applying
Stack to widely used systems has uncovered 161 new bugs
that have been confirmed and fixed by developers.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bila:2015:EOP,
author = "Nilton Bila and Eric J. Wright and Eyal {De Lara} and
Kaustubh Joshi and H. Andr{\'e}s Lagar-Cavilla and
Eunbyung Park and Ashvin Goel and Matti Hiltunen and
Mahadev Satyanarayanan",
title = "Energy-Oriented Partial Desktop Virtual Machine
Migration",
journal = j-TOCS,
volume = "33",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2699683",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Mar 13 07:03:25 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Modern offices are crowded with personal computers.
While studies have shown these to be idle most of the
time, they remain powered, consuming up to 60\% of
their peak power. Hardware-based solutions engendered
by PC vendors (e.g., low-power states, Wake-on-LAN)
have proved unsuccessful because, in spite of user
inactivity, these machines often need to remain network
active in support of background applications that
maintain network presence. Recent proposals have
advocated the use of consolidation of idle desktop
Virtual Machines (VMs). However, desktop VMs are often
large, requiring gigabytes of memory. Consolidating
such VMs creates large network transfers lasting in the
order of minutes and utilizes server memory
inefficiently. When multiple VMs migrate concurrently,
networks become congested, and the resulting migration
latencies are prohibitive. We present partial VM
migration, an approach that transparently migrates only
the working set of an idle VM. It creates a partial
replica of the desktop VM on the consolidation server
by copying only VM metadata, and it transfers pages to
the server on-demand, as the VM accesses them. This
approach places desktop PCs in low-power mode when
inactive and switches them to running mode when pages
are needed by the VM running on the consolidation
server. To ensure that desktops save energy, we have
developed sleep scheduling and prefetching algorithms,
as well as the context-aware selective resume
framework, a novel approach to reduce the latency of
power mode transition operations in commodity PCs.
Jettison, our software prototype of partial VM
migration for off-the-shelf PCs, can deliver 44--91\%
energy savings during idle periods of at least 10
minutes, while providing low migration latencies of
about 4 seconds and migrating minimal state that is
under an order of magnitude of the VM's memory
footprint.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Blem:2015:IWU,
author = "Emily Blem and Jaikrishnan Menon and Thiruvengadam
Vijayaraghavan and Karthikeyan Sankaralingam",
title = "{ISA} Wars: Understanding the Relevance of {ISA} being
{RISC} or {CISC} to Performance, Power, and Energy on
Modern Architectures",
journal = j-TOCS,
volume = "33",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2699682",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Mar 13 07:03:25 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "RISC versus CISC wars raged in the 1980s when chip
area and processor design complexity were the primary
constraints and desktops and servers exclusively
dominated the computing landscape. Today, energy and
power are the primary design constraints and the
computing landscape is significantly different: Growth
in tablets and smartphones running ARM (a RISC ISA) is
surpassing that of desktops and laptops running x86 (a
CISC ISA). Furthermore, the traditionally low-power ARM
ISA is entering the high-performance server market,
while the traditionally high-performance x86 ISA is
entering the mobile low-power device market. Thus, the
question of whether ISA plays an intrinsic role in
performance or energy efficiency is becoming important
again, and we seek to answer this question through a
detailed measurement-based study on real hardware
running real applications. We analyze measurements on
seven platforms spanning three ISAs (MIPS, ARM, and
x86) over workloads spanning mobile, desktop, and
server computing. Our methodical investigation
demonstrates the role of ISA in modern microprocessors'
performance and energy efficiency. We find that ARM,
MIPS, and x86 processors are simply engineering design
points optimized for different levels of performance,
and there is nothing fundamentally more energy
efficient in one ISA class or the other. The ISA being
RISC or CISC seems irrelevant.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lin:2015:KMO,
author = "Felix Xiaozhu Lin and Zhen Wang and Lin Zhong",
title = "{K2}: a Mobile Operating System for Heterogeneous
Coherence Domains",
journal = j-TOCS,
volume = "33",
number = "2",
pages = "4:1--4:??",
month = jun,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2699676",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jun 10 11:00:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Mobile System-on-Chips (SoC) that incorporate
heterogeneous coherence domains promise high energy
efficiency to a wide range of mobile applications, yet
are difficult to program. To exploit the architecture,
a desirable, yet missing capability is to replicate
operating system (OS) services over multiple coherence
domains with minimum inter-domain communication. In
designing such an OS, we set three goals: to ease
application development, to simplify OS engineering,
and to preserve the current OS performance. To this
end, we identify a shared-most OS model for multiple
coherence domains: creating per-domain instances of
core OS services with no shared state, while enabling
other extended OS services to share state across
domains. To test the model, we build K2, a prototype OS
on the TI OMAP4 SoC, by reusing most of the Linux 3.4
source. K2 presents a single system image to
applications with its two kernels running on top of the
two coherence domains of OMAP4. The two kernels have
independent instances of core OS services, such as page
allocation and interrupt management, as coordinated by
K2; the two kernels share most extended OS services,
such as device drivers, whose state is kept coherent
transparently by K2. Despite platform constraints and
unoptimized code, K2 improves energy efficiency for
light OS workloads by 8x--10x, while incurring less
than 9\% performance overhead for two device drivers
shared between kernels. Our experiences with K2 show
that the shared-most model is promising.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Johansen:2015:FSS,
author = "H{\aa}vard D. Johansen and Robbert {Van Renesse} and
Ymir Vigfusson and Dag Johansen",
title = "{Fireflies}: a Secure and Scalable Membership and
Gossip Service",
journal = j-TOCS,
volume = "33",
number = "2",
pages = "5:1--5:??",
month = jun,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2701418",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jun 10 11:00:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "An attacker who controls a computer in an overlay
network can effectively control the entire overlay
network if the mechanism managing membership
information can successfully be targeted. This article
describes Fireflies, an overlay network protocol that
fights such attacks by organizing members in a
verifiable pseudorandom structure so that an intruder
cannot incorrectly modify the membership views of
correct members. Fireflies provides each member with a
view of the entire membership, and supports networks
with moderate total churn. We evaluate Fireflies using
both simulations and PlanetLab to show that Fireflies
is a practical approach for secure membership
maintenance in such networks.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Chen:2015:SFA,
author = "Tianshi Chen and Shijin Zhang and Shaoli Liu and
Zidong Du and Tao Luo and Yuan Gao and Junjie Liu and
Dongsheng Wang and Chengyong Wu and Ninghui Sun and
Yunji Chen and Olivier Temam",
title = "A Small-Footprint Accelerator for Large-Scale Neural
Networks",
journal = j-TOCS,
volume = "33",
number = "2",
pages = "6:1--6:??",
month = jun,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2701417",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jun 10 11:00:03 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Machine-learning tasks are becoming pervasive in a
broad range of domains, and in a broad range of systems
(from embedded systems to data centers). At the same
time, a small set of machine-learning algorithms
(especially Convolutional and Deep Neural Networks,
i.e., CNNs and DNNs) are proving to be state-of-the-art
across many applications. As architectures evolve
toward heterogeneous multicores composed of a mix of
cores and accelerators, a machine-learning accelerator
can achieve the rare combination of efficiency (due to
the small number of target algorithms) and broad
application scope. Until now, most machine-learning
accelerator designs have been focusing on efficiently
implementing the computational part of the algorithms.
However, recent state-of-the-art CNNs and DNNs are
characterized by their large size. In this study, we
design an accelerator for large-scale CNNs and DNNs,
with a special emphasis on the impact of memory on
accelerator design, performance, and energy. We show
that it is possible to design an accelerator with a
high throughput, capable of performing 452 GOP/s (key
NN operations such as synaptic weight multiplications
and neurons outputs additions) in a small footprint of
3.02mm$^2$ and 485mW; compared to a 128-bit 2GHz SIMD
processor, the accelerator is $ 117.87 \times $ faster,
and it can reduce the total energy by $ 21.08 \times $.
The accelerator characteristics are obtained after
layout at 65nm. Such a high throughput in a small
footprint can open up the usage of state-of-the-art
machine-learning algorithms in a broad set of systems
and for a broad set of applications.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ousterhout:2015:RSS,
author = "John Ousterhout and Arjun Gopalan and Ashish Gupta and
Ankita Kejriwal and Collin Lee and Behnam Montazeri and
Diego Ongaro and Seo Jin Park and Henry Qin and Mendel
Rosenblum and Stephen Rumble and Ryan Stutsman and
Stephen Yang",
title = "The {RAMCloud} Storage System",
journal = j-TOCS,
volume = "33",
number = "3",
pages = "7:1--7:??",
month = sep,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2806887",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Sep 14 10:11:30 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "RAMCloud is a storage system that provides low-latency
access to large-scale datasets. To achieve low latency,
RAMCloud stores all data in DRAM at all times. To
support large capacities (1PB or more), it aggregates
the memories of thousands of servers into a single
coherent key-value store. RAMCloud ensures the
durability of DRAM-based data by keeping backup copies
on secondary storage. It uses a uniform log-structured
mechanism to manage both DRAM and secondary storage,
which results in high performance and efficient memory
usage. RAMCloud uses a polling-based approach to
communication, bypassing the kernel to communicate
directly with NICs; with this approach, client
applications can read small objects from any RAMCloud
storage server in less than 5 $ \mu $ s, durable writes
of small objects take about 13.5 $ \mu $ s. RAMCloud
does not keep multiple copies of data online; instead,
it provides high availability by recovering from
crashes very quickly (1 to 2 seconds). RAMCloud's crash
recovery mechanism harnesses the resources of the
entire cluster working concurrently so that recovery
performance scales with cluster size.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Baumann:2015:SAU,
author = "Andrew Baumann and Marcus Peinado and Galen Hunt",
title = "Shielding Applications from an Untrusted Cloud with
{Haven}",
journal = j-TOCS,
volume = "33",
number = "3",
pages = "8:1--8:??",
month = sep,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2799647",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Sep 14 10:11:30 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Today's cloud computing infrastructure requires
substantial trust. Cloud users rely on both the
provider's staff and its globally distributed
software/hardware platform not to expose any of their
private data. We introduce the notion of shielded
execution, which protects the confidentiality and
integrity of a program and its data from the platform
on which it runs (i.e., the cloud operator's OS, VM,
and firmware). Our prototype, Haven, is the first
system to achieve shielded execution of unmodified
legacy applications, including SQL Server and Apache,
on a commodity OS (Windows) and commodity hardware.
Haven leverages the hardware protection of Intel SGX to
defend against privileged code and physical attacks
such as memory probes, and also addresses the dual
challenges of executing unmodified legacy binaries and
protecting them from a malicious host. This work
motivated recent changes in the SGX specification.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lee:2015:SSK,
author = "Janghaeng Lee and Mehrzad Samadi and Yongjun Park and
Scott Mahlke",
title = "{SKMD}: Single Kernel on Multiple Devices for
Transparent {CPU--GPU} Collaboration",
journal = j-TOCS,
volume = "33",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2798725",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Sep 14 10:11:30 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Heterogeneous computing on CPUs and GPUs has
traditionally used fixed roles for each device: the GPU
handles data parallel work by taking advantage of its
massive number of cores while the CPU handles non
data-parallel work, such as the sequential code or data
transfer management. This work distribution can be a
poor solution as it underutilizes the CPU, has
difficulty generalizing beyond the single CPU-GPU
combination, and may waste a large fraction of time
transferring data. Further, CPUs are performance
competitive with GPUs on many workloads, thus simply
partitioning work based on the fixed roles may be a
poor choice. In this article, we present the
single-kernel multiple devices (SKMD) system, a
framework that transparently orchestrates collaborative
execution of a single data-parallel kernel across
multiple asymmetric CPUs and GPUs. The programmer is
responsible for developing a single data-parallel
kernel in OpenCL, while the system automatically
partitions the workload across an arbitrary set of
devices, generates kernels to execute the partial
workloads, and efficiently merges the partial outputs
together. The goal is performance improvement by
maximally utilizing all available resources to execute
the kernel. SKMD handles the difficult challenges of
exposed data transfer costs and the performance
variations GPUs have with respect to input size. On
real hardware, SKMD achieves an average speedup of 28\%
on a system with one multicore CPU and two asymmetric
GPUs compared to a fastest device execution strategy
for a set of popular OpenCL kernels.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Pellauer:2015:ECC,
author = "Michael Pellauer and Angshuman Parashar and Michael
Adler and Bushra Ahsan and Randy Allmon and Neal Crago
and Kermin Fleming and Mohit Gambhir and Aamer Jaleel
and Tushar Krishna and Daniel Lustig and Stephen Maresh
and Vladimir Pavlov and Rachid Rayess and Antonia Zhai
and Joel Emer",
title = "Efficient Control and Communication Paradigms for
Coarse-Grained Spatial Architectures",
journal = j-TOCS,
volume = "33",
number = "3",
pages = "10:1--10:??",
month = sep,
year = "2015",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2754930",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Sep 14 10:11:30 MDT 2015",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "There has been recent interest in exploring the
acceleration of nonvectorizable workloads with
spatially programmed architectures that are designed to
efficiently exploit pipeline parallelism. Such an
architecture faces two main problems: how to
efficiently control each processing element (PE) in the
system, and how to facilitate inter-PE communication
without the overheads of traditional shared-memory
coherent memory. In this article, we explore solving
these problems using triggered instructions and
latency-insensitive channels. Triggered instructions
completely eliminate the program counter (PC) and allow
programs to transition concisely between states without
explicit branch instructions. Latency-insensitive
channels allow efficient communication of inter-PE
control information while simultaneously enabling
flexible code placement and improving tolerance for
variable events such as cache accesses. Together, these
approaches provide a unified mechanism to avoid
overserialized execution, essentially achieving the
effect of techniques such as dynamic instruction
reordering and multithreading. Our analysis shows that
a spatial accelerator using triggered instructions and
latency-insensitive channels can achieve 8 $ \times $
greater area-normalized performance than a traditional
general-purpose processor. Further analysis shows that
triggered control reduces the number of static and
dynamic instructions in the critical paths by 62\% and
64\%, respectively, over a PC-style baseline,
increasing the performance of the spatial programming
approach by 2.0 $ \times $.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Peter:2016:AOS,
author = "Simon Peter and Jialin Li and Irene Zhang and Dan R.
K. Ports and Doug Woos and Arvind Krishnamurthy and
Thomas Anderson and Timothy Roscoe",
title = "{Arrakis}: The Operating System Is the Control Plane",
journal = j-TOCS,
volume = "33",
number = "4",
pages = "11:1--11:??",
month = jan,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2812806",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 6 06:45:30 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Recent device hardware trends enable a new approach to
the design of network server operating systems. In a
traditional operating system, the kernel mediates
access to device hardware by server applications to
enforce process isolation as well as network and disk
security. We have designed and implemented a new
operating system, Arrakis, that splits the traditional
role of the kernel in two. Applications have direct
access to virtualized I/O devices, allowing most I/O
operations to skip the kernel entirely, while the
kernel is re-engineered to provide network and disk
protection without kernel mediation of every operation.
We describe the hardware and software changes needed to
take advantage of this new abstraction, and we
illustrate its power by showing improvements of 2 to 5
$ \times $ in latency and 9 $ \times $ throughput for a
popular persistent NoSQL store relative to a well-tuned
Linux implementation.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Kumar:2016:ASC,
author = "Rakesh Kumar and Alejandro Mart{\'\i}nez and Antonio
Gonz{\'a}lez",
title = "Assisting Static Compiler Vectorization with a
Speculative Dynamic Vectorizer in an {HW\slash SW}
Codesigned Environment",
journal = j-TOCS,
volume = "33",
number = "4",
pages = "12:1--12:??",
month = jan,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2807694",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 6 06:45:30 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Compiler-based static vectorization is used widely to
extract data-level parallelism from
computation-intensive applications. Static
vectorization is very effective in vectorizing
traditional array-based applications. However,
compilers' inability to do accurate interprocedural
pointer disambiguation and interprocedural array
dependence analysis severely limits vectorization
opportunities. HW/SW codesigned processors provide an
excellent opportunity to optimize the applications at
runtime. The availability of dynamic application
behavior at runtime helps in capturing vectorization
opportunities generally missed by the compilers. This
article proposes to complement the static vectorization
with a speculative dynamic vectorizer in an HW/SW
codesigned processor. We present a speculative dynamic
vectorization algorithm that speculatively reorders
ambiguous memory references to uncover vectorization
opportunities. The speculative reordering of memory
instructions avoids the need for accurate
interprocedural pointer disambiguation and
interprocedural array dependence analysis. The hardware
checks for any memory dependence violation due to
speculative vectorization and takes corrective action
in case of violation. Our experiments show that the
combined (static + dynamic) vectorization approach
provides a $ 2 \times $ performance benefit compared to
the static GCC vectorization alone, for SPECFP2006.
Furthermore, the speculative dynamic vectorizer is able
to vectorize 48\% of the loops that ICC failed to
vectorize due to conservative dependence analysis in
the TSVC benchmark suite. Moreover, the dynamic
vectorization scheme is as effective in vectorization
of pointer-based applications as for the array-based
ones, whereas compilers lose significant vectorization
opportunities in pointer-based applications.
Furthermore, we show that speculation is not only a
luxury but also a necessity for runtime
vectorization.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lozi:2016:FPL,
author = "Jean-Pierre Lozi and Florian David and Ga{\"e}l Thomas
and Julia Lawall and Gilles Muller",
title = "Fast and Portable Locking for Multicore
Architectures",
journal = j-TOCS,
volume = "33",
number = "4",
pages = "13:1--13:??",
month = jan,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2845079",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Jan 6 06:45:30 MST 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/multithreading.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "The scalability of multithreaded applications on
current multicore systems is hampered by the
performance of lock algorithms, due to the costs of
access contention and cache misses. The main
contribution presented in this article is a new locking
technique, Remote Core Locking (RCL), that aims to
accelerate the execution of critical sections in legacy
applications on multicore architectures. The idea of
RCL is to replace lock acquisitions by optimized remote
procedure calls to a dedicated server hardware thread.
RCL limits the performance collapse observed with other
lock algorithms when many threads try to acquire a lock
concurrently and removes the need to transfer
lock-protected shared data to the hardware thread
acquiring the lock, because such data can typically
remain in the server's cache. Other contributions
presented in this article include a profiler that
identifies the locks that are the bottlenecks in
multithreaded applications and that can thus benefit
from RCL, and a reengineering tool that transforms
POSIX lock acquisitions into RCL locks. Eighteen
applications were used to evaluate RCL: the nine
applications of the SPLASH-2 benchmark suite, the seven
applications of the Phoenix 2 benchmark suite,
Memcached, and Berkeley DB with a TPC-C client. Eight
of these applications are unable to scale because of
locks and benefit from RCL on an x86 machine with four
AMD Opteron processors and 48 hardware threads. By
using RCL instead of Linux POSIX locks, performance is
improved by up to 2.5 times on Memcached, and up to
11.6 times on Berkeley DB with the TPC-C client. On a
SPARC machine with two Sun UltraSPARC T2+ processors
and 128 hardware threads, three applications benefit
from RCL. In particular, performance is improved by up
to 1.3 times with respect to Solaris POSIX locks on
Memcached, and up to 7.9 times on Berkeley DB with the
TPC-C client.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Heiser:2016:LML,
author = "Gernot Heiser and Kevin Elphinstone",
title = "{L4} Microkernels: The Lessons from 20 Years of
Research and Deployment",
journal = j-TOCS,
volume = "34",
number = "1",
pages = "1:1--1:29",
month = apr,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2893177",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat May 21 08:09:53 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "The L4 microkernel has undergone 20 years of use and
evolution. It has an active user and developer
community, and there are commercial versions that are
deployed on a large scale and in safety-critical
systems. In this article we examine the lessons learnt
in those 20 years about microkernel design and
implementation. We revisit the L4 design articles and
examine the evolution of design and implementation from
the original L4 to the latest generation of L4 kernels.
We specifically look at seL4, which has pushed the L4
model furthest and was the first OS kernel to undergo a
complete formal verification of its implementation as
well as a sound analysis of worst-case execution times.
We demonstrate that while much has changed, the
fundamental principles of minimality, generality, and
high inter-process communication (IPC) performance
remain the main drivers of design and implementation
decisions.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hauswald:2016:DFW,
author = "Johann Hauswald and Michael A. Laurenzano and Yunqi
Zhang and Hailong Yang and Yiping Kang and Cheng Li and
Austin Rovinski and Arjun Khurana and Ronald G.
Dreslinski and Trevor Mudge and Vinicius Petrucci and
Lingjia Tang and Jason Mars",
title = "Designing Future Warehouse-Scale Computers for
{Sirius}, an End-to-End Voice and Vision Personal
Assistant",
journal = j-TOCS,
volume = "34",
number = "1",
pages = "2:1--2:??",
month = apr,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2870631",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat May 21 08:09:53 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "As user demand scales for intelligent personal
assistants (IPAs) such as Apple's Siri, Google's Google
Now, and Microsoft's Cortana, we are approaching the
computational limits of current datacenter (DC)
architectures. It is an open question how future server
architectures should evolve to enable this emerging
class of applications, and the lack of an open-source
IPA workload is an obstacle in addressing this
question. In this article, we present the design of
Sirius, an open end-to-end IPA Web-service application
that accepts queries in the form of voice and images,
and responds with natural language. We then use this
workload to investigate the implications of four points
in the design space of future accelerator-based server
architectures spanning traditional CPUs, GPUs, manycore
throughput co-processors, and FPGAs. To investigate
future server designs for Sirius, we decompose Sirius
into a suite of eight benchmarks (Sirius Suite)
comprising the computationally intensive bottlenecks of
Sirius. We port Sirius Suite to a spectrum of
accelerator platforms and use the performance and power
trade-offs across these platforms to perform a total
cost of ownership (TCO) analysis of various server
design points. In our study, we find that accelerators
are critical for the future scalability of IPA
services. Our results show that GPU- and
FPGA-accelerated servers improve the query latency on
average by 8.5$ \times $ and 15$ \times $,
respectively. For a given throughput, GPU- and
FPGA-accelerated servers can reduce the TCO of DCs by
2.3$ \times $ and 1.3$ \times $, respectively.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Badamo:2016:IPE,
author = "Michael Badamo and Jeff Casarona and Minshu Zhao and
Donald Yeung",
title = "Identifying Power-Efficient Multicore Cache
Hierarchies via Reuse Distance Analysis",
journal = j-TOCS,
volume = "34",
number = "1",
pages = "3:1--3:??",
month = apr,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2851503",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat May 21 08:09:53 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "To enable performance improvements in a
power-efficient manner, computer architects have been
building CPUs that exploit greater amounts of
thread-level parallelism. A key consideration in such
CPUs is properly designing the on-chip cache hierarchy.
Unfortunately, this can be hard to do, especially for
CPUs with high core counts and large amounts of cache.
The enormous design space formed by the combinatorial
number of ways in which to organize the cache hierarchy
makes it difficult to identify power-efficient
configurations. Moreover, the problem is exacerbated by
the slow speed of architectural simulation, which is
the primary means for conducting such design space
studies. A powerful tool that can help architects
optimize CPU cache hierarchies is reuse distance (RD)
analysis. Recent work has extended uniprocessor RD
techniques-i.e., by introducing concurrent RD and
private-stack RD profiling-to enable analysis of
different types of caches in multicore CPUs. Once
acquired, parallel locality profiles can predict the
performance of numerous cache configurations,
permitting highly efficient design space exploration.
To date, existing work on multicore RD analysis has
focused on developing the profiling techniques and
assessing their accuracy. Unfortunately, there has been
no work on using RD analysis to optimize CPU
performance or power consumption. This article
investigates applying multicore RD analysis to identify
the most power efficient cache configurations for a
multicore CPU. First, we develop analytical models that
use the cache-miss counts from parallel locality
profiles to estimate CPU performance and power
consumption. Although future scalable CPUs will likely
employ multithreaded (and even out-of-order) cores, our
current study assumes single-threaded in-order cores to
simplify the models, allowing us to focus on the cache
hierarchy and our RD-based techniques. Second, to
demonstrate the utility of our techniques, we apply our
models to optimize a large-scale tiled CPU architecture
with a two-level cache hierarchy. We show that the most
power efficient configuration varies considerably
across different benchmarks, and that our locality
profiles provide deep insights into why certain
configurations are power efficient. We also show that
picking the best configuration can provide significant
gains, as there is a 2.01x power efficiency spread
across our tiled CPU design space. Finally, we validate
the accuracy of our techniques using detailed
simulation. Among several simulated configurations, our
techniques can usually pick the most power efficient
configuration, or one that is very close to the best.
In addition, across all simulated configurations, we
can predict power efficiency with 15.2\% error.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Perais:2016:ECS,
author = "Arthur Perais and Andr{\'e} Seznec",
title = "{EOLE}: Combining Static and Dynamic Scheduling
Through Value Prediction to Reduce Complexity and
Increase Performance",
journal = j-TOCS,
volume = "34",
number = "2",
pages = "4:1--4:??",
month = may,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2870632",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat May 21 08:09:53 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Recent work in the field of value prediction (VP) has
shown that given an efficient confidence estimation
mechanism, prediction validation could be removed from
the out-of-order engine and delayed until commit time.
As a result, a simple recovery mechanism-pipeline
squashing-can be used, whereas the out-of-order engine
remains mostly unmodified. Yet, VP and validation at
commit time require additional ports on the physical
register file, potentially rendering the overall number
of ports unbearable. Fortunately, VP also implies that
many single-cycle ALU instructions have their operands
predicted in the front-end and can be executed
in-place, in-order. Similarly, the execution of
single-cycle instructions whose result has been
predicted can be delayed until commit time since
predictions are validated at commit time. Consequently,
a significant number of instructions-10\% to 70\% in
our experiments-can bypass the out-of-order engine,
allowing for a reduction of the issue width. This
reduction paves the way for a truly practical
implementation of VP. Furthermore, since VP in itself
usually increases performance, our resulting
{Early-Out-of-Order-Late} Execution architecture, EOLE,
is often more efficient than a baseline VP-augmented
6-issue superscalar while having a significantly
narrower 4-issue out-of-order engine.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Li:2016:FSA,
author = "Sheng Li and Hyeontaek Lim and Victor W. Lee and Jung
Ho Ahn and Anuj Kalia and Michael Kaminsky and David G.
Andersen and Seongil O. and Sukhan Lee and Pradeep
Dubey",
title = "Full-Stack Architecting to Achieve a
Billion-Requests-Per-Second Throughput on a Single
Key--Value Store Server Platform",
journal = j-TOCS,
volume = "34",
number = "2",
pages = "5:1--5:??",
month = may,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2897393",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat May 21 08:09:53 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Distributed in-memory key-value stores (KVSs), such as
memcached, have become a critical data serving layer in
modern Internet-oriented data center infrastructure.
Their performance and efficiency directly affect the
QoS of web services and the efficiency of data centers.
Traditionally, these systems have had significant
overheads from inefficient network processing, OS
kernel involvement, and concurrency control. Two recent
research thrusts have focused on improving key-value
performance. Hardware-centric research has started to
explore specialized platforms including FPGAs for KVSs;
results demonstrated an order of magnitude increase in
throughput and energy efficiency over stock memcached.
Software-centric research revisited the KVS application
to address fundamental software bottlenecks and to
exploit the full potential of modern commodity
hardware; these efforts also showed orders of magnitude
improvement over stock memcached. We aim at
architecting high-performance and efficient KVS
platforms, and start with a rigorous architectural
characterization across system stacks over a collection
of representative KVS implementations. Our detailed
full-system characterization not only identifies the
critical hardware/software ingredients for
high-performance KVS systems but also leads to guided
optimizations atop a recent design to achieve a
record-setting throughput of 120 million requests per
second (MRPS) (167MRPS with client-side batching) on a
single commodity server. Our system delivers the best
performance and energy efficiency (RPS/watt)
demonstrated to date over existing KVSs including the
best-published FPGA-based and GPU-based claims. We
craft a set of design principles for future platform
architectures, and via detailed simulations demonstrate
the capability of achieving a billion RPS with a single
server constructed following our principles.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Lo:2016:IRE,
author = "David Lo and Liqun Cheng and Rama Govindaraju and
Parthasarathy Ranganathan and Christos Kozyrakis",
title = "Improving Resource Efficiency at Scale with
{Heracles}",
journal = j-TOCS,
volume = "34",
number = "2",
pages = "6:1--6:??",
month = may,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2882783",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat May 21 08:09:53 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "User-facing, latency-sensitive services, such as
websearch, underutilize their computing resources
during daily periods of low traffic. Reusing those
resources for other tasks is rarely done in production
services since the contention for shared resources can
cause latency spikes that violate the service-level
objectives of latency-sensitive tasks. The resulting
under-utilization hurts both the affordability and
energy efficiency of large-scale datacenters. With the
slowdown in technology scaling caused by the sunsetting
of Moore's law, it becomes important to address this
opportunity. We present Heracles, a feedback-based
controller that enables the safe colocation of
best-effort tasks alongside a latency-critical service.
Heracles dynamically manages multiple hardware and
software isolation mechanisms, such as CPU, memory, and
network isolation, to ensure that the latency-sensitive
job meets latency targets while maximizing the
resources given to best-effort tasks. We evaluate
Heracles using production latency-critical and batch
workloads from Google and demonstrate average server
utilizations of 90\% without latency violations across
all the load and colocation scenarios that we
evaluated.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jun:2016:BDF,
author = "Sang-Woo Jun and Ming Liu and Sungjin Lee and Jamey
Hicks and John Ankcorn and Myron King and Shuotao Xu
and Arvind",
title = "{BlueDBM}: Distributed Flash Storage for Big Data
Analytics",
journal = j-TOCS,
volume = "34",
number = "3",
pages = "7:1--7:??",
month = sep,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2898996",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 17 16:09:15 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Complex data queries, because of their need for random
accesses, have proven to be slow unless all the data
can be accommodated in DRAM. There are many domains,
such as genomics, geological data, and daily Twitter
feeds, where the datasets of interest are 5TB to 20TB.
For such a dataset, one would need a cluster with 100
servers, each with 128GB to 256GB of DRAM, to
accommodate all the data in DRAM. On the other hand,
such datasets could be stored easily in the flash
memory of a rack-sized cluster. Flash storage has much
better random access performance than hard disks, which
makes it desirable for analytics workloads. However,
currently available off-the-shelf flash storage
packaged as SSDs does not make effective use of flash
storage because it incurs a great amount of additional
overhead during flash device management and network
access. In this article, we present BlueDBM, a new
system architecture that has flash-based storage with
in-store processing capability and a low-latency
high-throughput intercontroller network between storage
devices. We show that BlueDBM outperforms a flash-based
system without these features by a factor of 10 for
some important applications. While the performance of a
DRAM-centric system falls sharply even if only 5\% to
10\% of the references are to secondary storage, this
sharp performance degradation is not an issue in
BlueDBM. BlueDBM presents an attractive point in the
cost/performance tradeoff for Big Data analytics.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{West:2016:VSK,
author = "Richard West and Ye Li and Eric Missimer and Matthew
Danish",
title = "A Virtualized Separation Kernel for Mixed-Criticality
Systems",
journal = j-TOCS,
volume = "34",
number = "3",
pages = "8:1--8:??",
month = sep,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2935748",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 17 16:09:15 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
abstract = "Multi- and many-core processors are becoming
increasingly popular in embedded systems. Many of these
processors now feature hardware virtualization
capabilities, as found on the ARM Cortex A15 and x86
architectures with Intel VT-x or AMD-V support.
Hardware virtualization provides a way to partition
physical resources, including processor cores, memory,
and I/O devices, among guest virtual machines (VMs).
Each VM is then able to host tasks of a specific
criticality level, as part of a mixed-criticality
system with different timing and safety requirements.
However, traditional virtual machine systems are
inappropriate for mixed-criticality computing. They use
hypervisors to schedule separate VMs on physical
processor cores. The costs of trapping into hypervisors
to multiplex and manage machine physical resources on
behalf of separate guests are too expensive for many
time-critical tasks. Additionally, traditional
hypervisors have memory footprints that are often too
large for many embedded computing systems. In this
article, we discuss the design of the Quest-V
separation kernel, which partitions services of
different criticality levels across separate VMs, or
sandboxes. Each sandbox encapsulates a subset of
machine physical resources that it manages without
requiring intervention from a hypervisor. In Quest-V, a
hypervisor is only needed to bootstrap the system,
recover from certain faults, and establish
communication channels between sandboxes. This not only
reduces the memory footprint of the most privileged
protection domain but also removes it from the control
path during normal system operation, thereby
heightening security.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Silberstein:2016:GNA,
author = "Mark Silberstein and Sangman Kim and Seonggu Huh and
Xinya Zhang and Yige Hu and Amir Wated and Emmett
Witchel",
title = "{GPUnet}: Networking Abstractions for {GPU} Programs",
journal = j-TOCS,
volume = "34",
number = "3",
pages = "9:1--9:??",
month = sep,
year = "2016",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2963098",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 17 16:09:15 MDT 2016",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Despite the popularity of GPUs in high-performance and
scientific computing, and despite increasingly
general-purpose hardware capabilities, the use of GPUs
in network servers or distributed systems poses
significant challenges. GPUnet is a native GPU
networking layer that provides a socket abstraction and
high-level networking APIs for GPU programs. We use
GPUnet to streamline the development of
high-performance, distributed applications like
in-GPU-memory MapReduce and a new class of low-latency,
high-throughput GPU-native network services such as a
face verification server.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zheng:2017:RAS,
author = "Mai Zheng and Joseph Tucek and Feng Qin and Mark
Lillibridge and Bill W. Zhao and Elizabeth S. Yang",
title = "Reliability Analysis of {SSDs} Under Power Fault",
journal = j-TOCS,
volume = "34",
number = "4",
pages = "10:1--10:??",
month = jan,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2992782",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 24 09:40:46 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Modern storage technology (solid-state disks (SSDs),
NoSQL databases, commoditized RAID hardware, etc.)
brings new reliability challenges to the
already-complicated storage stack. Among other things,
the behavior of these new components during power
faults-which happen relatively frequently in data
centers-is an important yet mostly ignored issue in
this dependability-critical area. Understanding how new
storage components behave under power fault is the
first step towards designing new robust storage
systems. In this article, we propose a new methodology
to expose reliability issues in block devices under
power faults. Our framework includes specially designed
hardware to inject power faults directly to devices,
workloads to stress storage components, and techniques
to detect various types of failures. Applying our
testing framework, we test 17 commodity SSDs from six
different vendors using more than three thousand fault
injection cycles in total. Our experimental results
reveal that 14 of the 17 tested SSD devices exhibit
surprising failure behaviors under power faults,
including bit corruption, shorn writes, unserializable
writes, metadata corruption, and total device
failure.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Belay:2017:IOS,
author = "Adam Belay and George Prekas and Mia Primorac and Ana
Klimovic and Samuel Grossman and Christos Kozyrakis and
Edouard Bugnion",
title = "The {IX} Operating System: Combining Low Latency, High
Throughput, and Efficiency in a Protected Dataplane",
journal = j-TOCS,
volume = "34",
number = "4",
pages = "11:1--11:??",
month = jan,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/2997641",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 24 09:40:46 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
note = "See correction \cite{Belay:2017:CIO}.",
abstract = "The conventional wisdom is that aggressive networking
requirements, such as high packet rates for small
messages and $ \mu $ s-scale tail latency, are best
addressed outside the kernel, in a user-level
networking stack. We present ix, a dataplane operating
system that provides high I/O performance and high
resource efficiency while maintaining the protection
and isolation benefits of existing kernels. ix uses
hardware virtualization to separate management and
scheduling functions of the kernel (control plane) from
network processing (dataplane). The dataplane
architecture builds upon a native, zero-copy API and
optimizes for both bandwidth and latency by dedicating
hardware threads and networking queues to dataplane
instances, processing bounded batches of packets to
completion, and eliminating coherence traffic and
multicore synchronization. The control plane
dynamically adjusts core allocations and
voltage/frequency settings to meet service-level
objectives. We demonstrate that ix outperforms Linux
and a user-space network stack significantly in both
throughput and end-to-end latency. Moreover, ix
improves the throughput of a widely deployed, key-value
store by up to 6.$ 4 \times $ and reduces tail latency
by more than $ 2 \times $. With three varying load
patterns, the control plane saves 46\%--54\% of
processor energy, and it allows background jobs to run
at 35\%--47\% of their standalone throughput.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zahedi:2017:CSA,
author = "Seyed Majid Zahedi and Songchun Fan and Matthew Faw
and Elijah Cole and Benjamin C. Lee",
title = "Computational Sprinting: Architecture, Dynamics, and
Strategies",
journal = j-TOCS,
volume = "34",
number = "4",
pages = "12:1--12:??",
month = jan,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3014428",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 24 09:40:46 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Computational sprinting is a class of mechanisms that
boost performance but dissipate additional power. We
describe a sprinting architecture in which many,
independent chip multiprocessors share a power supply
and sprints are constrained by the chips' thermal
limits and the rack's power limits. Moreover, we
present the computational sprinting game, a multi-agent
perspective on managing sprints. Strategic agents
decide whether to sprint based on application phases
and system conditions. The game produces an equilibrium
that improves task throughput for data analytics
workloads by 4--6$ \times $ over prior greedy
heuristics and performs within 90\% of an upper bound
on throughput from a globally optimized policy.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zhu:2017:OGP,
author = "Yuhao Zhu and Vijay Janapa Reddi",
title = "Optimizing General-Purpose {CPUs} for Energy-Efficient
Mobile {Web} Computing",
journal = j-TOCS,
volume = "35",
number = "1",
pages = "1:1--1:??",
month = jul,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3041024",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 24 09:40:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Mobile applications are increasingly being built using
web technologies as a common substrate to achieve
portability and to improve developer productivity.
Unfortunately, web applications often incur large
performance overhead, directly affecting the user
quality-of-service (QoS) experience. Traditional
techniques in improving mobile processor performance
have mostly been adopting desktop-like design
techniques such as increasing single-core
microarchitecture complexity and aggressively
integrating more cores. However, such a
desktop-oriented strategy is likely coming to an end
due to the stringent energy and thermal constraints
that mobile devices impose. Therefore, we must pivot
away from traditional mobile processor design
techniques in order to provide sustainable performance
improvement while maintaining energy efficiency. In
this article, we propose to combine hardware
customization and specialization techniques to improve
the performance and energy efficiency of mobile web
applications. We first perform design-space exploration
(DSE) and identify opportunities in customizing
existing general-purpose mobile processors, that is,
tuning microarchitecture parameters. The thorough DSE
also lets us discover sources of energy inefficiency in
customized general-purpose architectures. To mitigate
these inefficiencies, we propose, synthesize, and
evaluate two new domain-specific specializations,
called the Style Resolution Unit and the Browser Engine
Cache. Our optimizations boost performance and energy
efficiency at the same time while maintaining
general-purpose programmability. As emerging mobile
workloads increasingly rely more on web technologies,
the type of optimizations we propose will become
important in the future and are likely to have a
long-lasting and widespread impact.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hsu:2017:RLT,
author = "Chang-Hong Hsu and Yunqi Zhang and Michael A.
Laurenzano and David Meisner and Thomas Wenisch and
Ronald G. Dreslinski and Jason Mars and Lingjia Tang",
title = "Reining in Long Tails in Warehouse-Scale Computers
with Quick Voltage Boosting Using Adrenaline",
journal = j-TOCS,
volume = "35",
number = "1",
pages = "2:1--2:??",
month = jul,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3054742",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 24 09:40:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Reducing the long tail of the query latency
distribution in modern warehouse scale computers is
critical for improving performance and quality of
service (QoS) of workloads such as Web Search and
Memcached. Traditional turbo boost increases a
processor's voltage and frequency during a
coarse-grained sliding window, boosting all queries
that are processed during that window. However, the
inability of such a technique to pinpoint tail queries
for boosting limits its tail reduction benefit. In this
work, we propose Adrenaline, an approach to leverage
finer-granularity (tens of nanoseconds) voltage
boosting to effectively rein in the tail latency with
query-level precision. Two key insights underlie this
work. First, emerging finer granularity
voltage/frequency boosting is an enabling mechanism for
intelligent allocation of the power budget to precisely
boost only the queries that contribute to the tail
latency; second, per-query characteristics can be used
to design indicators for proactively pinpointing these
queries, triggering boosting accordingly. Based on
these insights, Adrenaline effectively pinpoints and
boosts queries that are likely to increase the tail
distribution and can reap more benefit from the
voltage/frequency boost. By evaluating under various
workload configurations, we demonstrate the
effectiveness of our methodology. We achieve up to a
2.50 $ \times $ tail latency improvement for Memcached
and up to a 3.03 $ \times $ for Web Search over
coarse-grained dynamic voltage and frequency scaling
(DVFS) given a fixed boosting power budget. When
optimizing for energy reduction, Adrenaline achieves up
to a 1.81 $ \times $ improvement for Memcached and up
to a 1.99 $ \times $ for Web Search over coarse-grained
DVFS. By using the carefully chosen boost thresholds,
Adrenaline further improves the tail latency reduction
to 4.82 $ \times $ over coarse-grained DVFS.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Chen:2017:FMT,
author = "Haibo Chen and Rong Chen and Xingda Wei and Jiaxin Shi
and Yanzhe Chen and Zhaoguo Wang and Binyu Zang and
Haibing Guan",
title = "Fast In-Memory Transaction Processing Using {RDMA} and
{HTM}",
journal = j-TOCS,
volume = "35",
number = "1",
pages = "3:1--3:??",
month = jul,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3092701",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Jul 24 09:40:47 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/hash.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "DrTM is a fast in-memory transaction processing system
that exploits advanced hardware features such as remote
direct memory access (RDMA) and hardware transactional
memory (HTM). To achieve high efficiency, it mostly
offloads concurrency control such as tracking
read/write accesses and conflict detection into HTM in
a local machine and leverages the strong consistency
between RDMA and HTM to ensure serializability among
concurrent transactions across machines. To mitigate
the high probability of HTM aborts for large
transactions, we design and implement an optimized
transaction chopping algorithm to decompose a set of
large transactions into smaller pieces such that HTM is
only required to protect each piece. We further build
an efficient hash table for DrTM by leveraging HTM and
RDMA to simplify the design and notably improve the
performance. We describe how DrTM supports common
database features like read-only transactions and
logging for durability. Evaluation using typical OLTP
workloads including TPC-C and SmallBank shows that DrTM
has better single-node efficiency and scales well on a
six-node cluster; it achieves greater than 1.51, 34 and
5.24, 138 million transactions per second for TPC-C and
SmallBank on a single node and the cluster,
respectively. Such numbers outperform a
state-of-the-art single-node system (i.e., Silo) and a
distributed transaction system (i.e., Calvin) by at
least 1.9X and 29.6X for TPC-C.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zhao:2017:UMR,
author = "Minshu Zhao and Donald Yeung",
title = "Using Multicore Reuse Distance to Study Coherence
Directories",
journal = j-TOCS,
volume = "35",
number = "2",
pages = "4:1--4:??",
month = oct,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3092702",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Oct 10 17:48:24 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Researchers have proposed numerous techniques to
improve the scalability of coherence directories. The
effectiveness of these techniques not only depends on
application behavior, but also on the CPU's
configuration, for example, its core count and cache
size. As CPUs continue to scale, it is essential to
explore the directory's application and architecture
dependencies. However, this is challenging given the
slow speed of simulators. While it is common practice
to simulate different applications, previous research
on directory designs have explored only a few-and in
most cases, only one-CPU configuration, which can lead
to an incomplete and inaccurate view of the directory's
behavior. This article proposes to use multicore reuse
distance analysis to study coherence directories. We
develop a framework to extract the directory access
stream from parallel least recently used (LRU) stacks,
enabling rapid analysis of the directory's accesses and
contents across both core count and cache size scaling.
A key part of our framework is the notion of relative
reuse distance between sharers, which defines sharing
in a capacity-dependent fashion and facilitates our
analyses along the data cache size dimension. We
implement our framework in a profiler and then apply it
to gain insights into the impact of multicore CPU
scaling on directory behavior. Our profiling results
show that directory accesses reduce by 3.3$ \times $
when scaling the data cache size from 16KB to 1MB,
despite an increase in sharing-based directory
accesses. We also show that increased sharing caused by
data cache scaling allows the portion of on-chip memory
occupied by the directory to be reduced by 43.3\%,
compared to a reduction of only 2.6\% when scaling the
number of cores. And, we show certain directory entries
exhibit high temporal reuse. In addition to gaining
insights, we also validate our profile-based results,
and find they are within 2--10\% of cache simulations
on average, across different validation experiments.
Finally, we conduct four case studies that illustrate
our insights on existing directory techniques. In
particular, we demonstrate our directory occupancy
insights on a Cuckoo directory; we apply our sharing
insights to provide bounds on the size of Scalable
Coherence Directories (SCD) and Dual-Grain Directories
(DGD); and, we demonstrate our directory entry reuse
insights on a multilevel directory design.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Chun:2017:ARR,
author = "Byung-Gon Chun and Tyson Condie and Yingda Chen and
Brian Cho and Andrew Chung and Carlo Curino and Chris
Douglas and Matteo Interlandi and Beomyeol Jeon and Joo
Seong Jeong and Gyewon Lee and Yunseong Lee and Tony
Majestro and Dahlia Malkhi and Sergiy Matusevych and
Brandon Myers and Mariia Mykhailova and Shravan
Narayanamurthy and Joseph Noor and Raghu Ramakrishnan
and Sriram Rao and Russell Sears and Beysim Sezgin and
Taegeon Um and Julia Wang and Markus Weimer and
Youngseok Yang",
title = "{Apache REEF}: Retainable Evaluator Execution
Framework",
journal = j-TOCS,
volume = "35",
number = "2",
pages = "5:1--5:??",
month = oct,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3132037",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Oct 10 17:48:24 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Resource Managers like YARN and Mesos have emerged as
a critical layer in the cloud computing system stack,
but the developer abstractions for leasing cluster
resources and instantiating application logic are very
low level. This flexibility comes at a high cost in
terms of developer effort, as each application must
repeatedly tackle the same challenges (e.g., fault
tolerance, task scheduling and coordination) and
reimplement common mechanisms (e.g., caching, bulk-data
transfers). This article presents REEF, a development
framework that provides a control plane for scheduling
and coordinating task-level (data-plane) work on
cluster resources obtained from a Resource Manager.
REEF provides mechanisms that facilitate resource reuse
for data caching and state management abstractions that
greatly ease the development of elastic data processing
pipelines on cloud platforms that support a Resource
Manager service. We illustrate the power of REEF by
showing applications built atop: a distributed shell
application, a machine-learning framework, a
distributed in-memory caching system, and a port of the
CORFU system. REEF is currently an Apache top-level
project that has attracted contributors from several
institutions and it is being used to develop several
commercial offerings such as the Azure Stream Analytics
service.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Shen:2017:SLC,
author = "Zhiming Shen and Qin Jia and Gur-Eyal Sela and Weijia
Song and Hakim Weatherspoon and Robbert {Van Renesse}",
title = "{Supercloud}: a Library Cloud for Exploiting Cloud
Diversity",
journal = j-TOCS,
volume = "35",
number = "2",
pages = "6:1--6:??",
month = oct,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3132038",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Oct 10 17:48:24 MDT 2017",
bibsource = "http://www.acm.org/pubs/contents/journals/tocs/;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Infrastructure-as-a-Service (IaaS) cloud providers
hide available interfaces for virtual machine (VM)
placement and migration, CPU capping, memory
ballooning, page sharing, and I/O throttling, limiting
the ways in which applications can optimally configure
resources or respond to dynamically shifting workloads.
Given these interfaces, applications could migrate VMs
in response to diurnal workloads or changing prices,
adjust resources in response to load changes, and so
on. This article proposes a new abstraction that we
call a Library Cloud and that allows users to customize
the diverse available cloud resources to best serve
their applications. We built a prototype of a Library
Cloud that we call the Supercloud. The Supercloud
encapsulates applications in a virtual cloud under
users' full control and can incorporate one or more
availability zones within a cloud provider or across
different providers. The Supercloud provides virtual
machine, storage, and networking complete with a full
set of management operations, allowing applications to
optimize performance. In this article, we demonstrate
various innovations enabled by the Library Cloud.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Diegues:2017:SPS,
author = "Nuno Diegues and Paolo Romano and Stoyan Garbatov",
title = "{Seer}: Probabilistic Scheduling for Hardware
Transactional Memory",
journal = j-TOCS,
volume = "35",
number = "3",
pages = "7:1--7:41",
month = dec,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3132036",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Dec 27 09:34:24 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "The ubiquity of multicore processors has led
programmers to write parallel and concurrent
applications to take advantage of the underlying
hardware and speed up their executions. In this
context, Transactional Memory (TM) has emerged as a
simple and effective synchronization paradigm, via the
familiar abstraction of atomic transactions. After many
years of intense research, major processor
manufacturers (including Intel) have recently released
mainstream processors with hardware support for TM
(HTM). In this work, we study a relevant issue with
great impact on the performance of HTM. Due to the
optimistic and inherently limited nature of HTM,
transactions may have to be aborted and restarted
numerous times, without any progress guarantee. As a
result, it is up to the software library that regulates
the HTM usage to ensure progress and optimize
performance. Transaction scheduling is probably one of
the most well-studied and effective techniques to
achieve these goals. However, these recent mainstream
HTMs have some technical limitations that prevent the
adoption of known scheduling techniques: unlike
software implementations of TM used in the past,
existing HTMs provide limited or no information on
which memory regions or contending transactions caused
the abort. To address this crucial issue for HTMs, we
propose Seer, a software scheduler that addresses
precisely this restriction of HTM by leveraging on an
online probabilistic inference technique that
identifies the most likely conflict relations and
establishes a dynamic locking scheme to serialize
transactions in a fine-grained manner. The key idea of
our solution is to constrain the portions of
parallelism that are affecting negatively the whole
system. As a result, this not only prevents performance
reduction but also in fact unveils further scalability
and performance for HTM. Via an extensive evaluation
study, we show that Seer improves the performance of
the Intel's HTM by up to 3.6$ \times $, and by 65\% on
average across all concurrency degrees and benchmarks
on a large processor with 28 cores.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Nishtala:2017:HAI,
author = "Rajiv Nishtala and Paul Carpenter and Vinicius
Petrucci and Xavier Martorell",
title = "The {Hipster} Approach for Improving Cloud System
Efficiency",
journal = j-TOCS,
volume = "35",
number = "3",
pages = "8:1--8:28",
month = dec,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3144168",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Dec 27 09:34:24 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "In 2013, U.S. data centers accounted for 2.2\% of the
country's total electricity consumption, a figure that
is projected to increase rapidly over the next decade.
Many important data center workloads in cloud computing
are interactive, and they demand strict levels of
quality-of-service (QoS) to meet user expectations,
making it challenging to optimize power consumption
along with increasing performance demands. This article
introduces Hipster, a technique that combines
heuristics and reinforcement learning to improve
resource efficiency in cloud systems. Hipster explores
heterogeneous multi-cores and dynamic voltage and
frequency scaling for reducing energy consumption while
managing the QoS of the latency-critical workloads. To
improve data center utilization and make best usage of
the available resources, Hipster can dynamically assign
remaining cores to batch workloads without violating
the QoS constraints for the latency-critical workloads.
We perform experiments using a 64-bit ARM big.LITTLE
platform and show that, compared to prior work, Hipster
improves the QoS guarantee for Web-Search from 80\% to
96\%, and for Memcached from 92\% to 99\%, while
reducing the energy consumption by up to 18\%. Hipster
is also effective in learning and adapting
automatically to specific requirements of new incoming
workloads just enough to meet the QoS and optimize
resource consumption.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Cherupalli:2017:DAS,
author = "Hari Cherupalli and Henry Duwe and Weidong Ye and
Rakesh Kumar and John Sartori",
title = "Determining Application-Specific Peak Power and Energy
Requirements for Ultra-Low-Power Processors",
journal = j-TOCS,
volume = "35",
number = "3",
pages = "9:1--9:33",
month = dec,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3148052",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Wed Dec 27 09:34:24 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
abstract = "Many emerging applications such as the Internet of
Things, wearables, implantables, and sensor networks
are constrained by power and energy. These applications
rely on ultra-low-power processors that have rapidly
become the most abundant type of processor manufactured
today. In the ultra-low-power embedded systems used by
these applications, peak power and energy requirements
are the primary factors that determine critical system
characteristics, such as size, weight, cost, and
lifetime. While the power and energy requirements of
these systems tend to be application specific,
conventional techniques for rating peak power and
energy cannot accurately bound the power and energy
requirements of an application running on a processor,
leading to overprovisioning that increases system size
and weight. In this article, we present an automated
technique that performs hardware-software coanalysis of
the application and ultra-low-power processor in an
embedded system to determine application-specific peak
power and energy requirements. Our technique provides
more accurate, tighter bounds than conventional
techniques for determining peak power and energy
requirements. Also, unlike conventional approaches, our
technique reports guaranteed bounds on peak power and
energy independent of an application's input set.
Tighter bounds on peak power and energy can be
exploited to reduce system size, weight, and cost.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Belay:2017:CIO,
author = "Adam Belay and George Prekas and Mia Primorac and Ana
Klimovic and Samuel Grossman and Christos Kozyrakis and
Edouard Bugnion",
title = "Corrigendum to {``The IX Operating System: Combining
Low Latency, High Throughput and Efficiency in a
Protected Dataplane''}",
journal = j-TOCS,
volume = "35",
number = "3",
pages = "10:1--10:1",
month = dec,
year = "2017",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3154292",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Dec 29 17:57:41 MST 2017",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "See \cite{Belay:2017:IOS}.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Mace:2018:PTD,
author = "Jonathan Mace and Ryan Roelke and Rodrigo Fonseca",
title = "Pivot Tracing: Dynamic Causal Monitoring for
Distributed Systems",
journal = j-TOCS,
volume = "35",
number = "4",
pages = "11:1--11:??",
month = dec,
year = "2018",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3208104",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:29 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/java2010.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3208104",
abstract = "Monitoring and troubleshooting distributed systems is
notoriously difficult; potential problems are complex,
varied, and unpredictable. The monitoring and diagnosis
tools commonly used today-logs, counters, and
metrics-have two important limitations: what gets
recorded is defined a priori, and the information is
recorded in a component- or machine-centric way, making
it extremely hard to correlate events that cross these
boundaries. This article presents Pivot Tracing, a
monitoring framework for distributed systems that
addresses both limitations by combining dynamic
instrumentation with a novel relational operator: the
happened-before join. Pivot Tracing gives users, at
runtime, the ability to define arbitrary metrics at one
point of the system, while being able to select,
filter, and group by events meaningful at other parts
of the system, even when crossing component or machine
boundaries. We have implemented a prototype of Pivot
Tracing for Java-based systems and evaluate it on a
heterogeneous Hadoop cluster comprising HDFS, HBase,
MapReduce, and YARN. We show that Pivot Tracing can
effectively identify a diverse range of root causes
such as software bugs, misconfiguration, and limping
hardware. We show that Pivot Tracing is dynamic,
extensible, and enables cross-tier analysis between
inter-operating applications, with low execution
overhead.",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zhang:2018:BCT,
author = "Irene Zhang and Naveen Kr. Sharma and Adriana Szekeres
and Arvind Krishnamurthy and Dan R. K. Ports",
title = "Building Consistent Transactions with Inconsistent
Replication",
journal = j-TOCS,
volume = "35",
number = "4",
pages = "12:1--12:??",
month = dec,
year = "2018",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3269981",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:29 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3269981",
abstract = "Application programmers increasingly prefer
distributed storage systems with strong consistency and
distributed transactions (e.g., Google's Spanner) for
their strong guarantees and ease of use. Unfortunately,
existing transactional storage systems are expensive to
use-in part, because they require costly replication
protocols, like Paxos, for fault tolerance. In this
article, we present a new approach that makes
transactional storage systems more affordable: We
eliminate consistency from the replication protocol,
while still providing distributed transactions with
strong consistency to applications. We present the
Transactional Application Protocol for Inconsistent
Replication (TAPIR), the first transaction protocol to
use a novel replication protocol, called inconsistent
replication, that provides fault tolerance without
consistency. By enforcing strong consistency only in
the transaction protocol, TAPIR can commit transactions
in a single round-trip and order distributed
transactions without centralized coordination. We
demonstrate the use of TAPIR in a transactional
key-value store, TAPIR-KV. Compared to conventional
systems, TAPIR-KV provides better latency and better
throughput.",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Hunt:2018:RDS,
author = "Tyler Hunt and Zhiting Zhu and Yuanzhong Xu and Simon
Peter and Emmett Witchel",
title = "{Ryoan}: a Distributed Sandbox for Untrusted
Computation on Secret Data",
journal = j-TOCS,
volume = "35",
number = "4",
pages = "13:1--13:??",
month = dec,
year = "2018",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3231594",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:29 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3231594",
abstract = "Users of modern data-processing services such as tax
preparation or genomic screening are forced to trust
them with data that the users wish to keep secret.
Ryoan$^1$ protects secret data while it is processed by
services that the data owner does not trust.
Accomplishing this goal in a distributed setting is
difficult, because the user has no control over the
service providers or the computational platform.
Confining code to prevent it from leaking secrets is
notoriously difficult, but Ryoan benefits from new
hardware and a request-oriented data model. Ryoan
provides a distributed sandbox, leveraging hardware
enclaves (e.g., Intel's software guard extensions (SGX)
[40]) to protect sandbox instances from potentially
malicious computing platforms. The protected sandbox
instances confine untrusted data-processing modules to
prevent leakage of the user's input data. Ryoan is
designed for a request-oriented data model, where
confined modules only process input once and do not
persist state about the input. We present the design
and prototype implementation of Ryoan and evaluate it
on a series of challenging problems including email
filtering, health analysis, image processing and
machine translation.",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Guerraoui:2019:LUA,
author = "Rachid Guerraoui and Hugo Guiroux and Renaud Lachaize
and Vivien Qu{\'e}ma and Vasileios Trigonakis",
title = "Lock--Unlock: Is That All? {A} Pragmatic Analysis of
Locking in Software Systems",
journal = j-TOCS,
volume = "36",
number = "1",
pages = "1:1--1:??",
month = mar,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3301501",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:29 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3301501",
abstract = "A plethora of optimized mutex lock algorithms have
been designed over the past 25 years to mitigate
performance bottlenecks related to critical sections
and locks. Unfortunately, there is currently no broad
study of the behavior of these optimized lock
algorithms on realistic applications that consider
different performance metrics, such as energy
efficiency and tail latency. In this article, we
perform a thorough and practical analysis of
synchronization, with the goal of providing software
developers with enough information to design fast,
scalable, and energy-efficient synchronization in their
systems. First, we perform a performance study of 28
state-of-the-art mutex lock algorithms, on 40
applications, on four different multicore machines. We
consider not only throughput (traditionally the main
performance metric) but also energy efficiency and tail
latency, which are becoming increasingly important.
Second, we present an in-depth analysis in which we
summarize our findings for all the studied
applications. In particular, we describe nine different
lock-related performance bottlenecks, and we propose
six guidelines helping software developers with their
choice of a lock algorithm according to the different
lock properties and the application characteristics.
From our detailed analysis, we make several
observations regarding locking algorithms and
application behaviors, several of which have not been
previously discovered: (i) applications stress not only
the lock-unlock interface but also the full locking API
(e.g., trylocks, condition variables); (ii) the memory
footprint of a lock can directly affect the application
performance; (iii) for many applications, the
interaction between locks and scheduling is an
important application performance factor; (vi) lock
tail latencies may or may not affect application tail
latency; (v) no single lock is systematically the best;
(vi) choosing the best lock is difficult; and (vii)
energy efficiency and throughput go hand in hand in the
context of lock algorithms. These findings highlight
that locking involves more considerations than the
simple lock/unlock interface and call for further
research on designing low-memory footprint adaptive
locks that fully and efficiently support the full lock
interface, and consider all performance metrics.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Zhao:2019:VER,
author = "Boyan Zhao and Rui Hou and Jianbo Dong and Michael
Huang and Sally A. Mckee and Qianlong Zhang and Yueji
Liu and Ye Li and Lixin Zhang and Dan Meng",
title = "{Venice}: an Effective Resource Sharing Architecture
for Data Center Servers",
journal = j-TOCS,
volume = "36",
number = "1",
pages = "2:1--2:??",
month = mar,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3310360",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:29 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310360",
abstract = "Consolidated server racks are quickly becoming the
standard infrastructure for engineering, business,
medicine, and science. Such servers are still designed
much in the way when they were organized as individual,
distributed systems. Given that many fields rely on
big-data analytics substantially, its
cost-effectiveness and performance should be improved,
which can be achieved by flexibly allowing resources to
be shared across nodes. Here we describe Venice, a
family of data-center server architectures that
includes a strong communication substrate as a
first-class resource. Venice supports a diverse set of
resource-joining mechanisms that enables applications
to leverage non-local resources efficiently. We have
constructed a hardware prototype to better understand
the implications of design decisions about system
support for resource sharing. We use it to measure the
performance of at-scale applications and to explore
performance, power, and resource-sharing transparency
tradeoffs (i.e., how many programming changes are
needed). We analyze these tradeoffs for sharing memory,
accelerators, and NICs. We find that reducing/hiding
latency is particularly important, the chosen
communication channels should match the sharing access
patterns of the applications, and of which we can
improve performance by exploiting inter-channel
collaboration.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Shi:2019:DGC,
author = "Xuanhua Shi and Zhixiang Ke and Yongluan Zhou and Hai
Jin and Lu Lu and Xiong Zhang and Ligang He and Zhenyu
Hu and Fei Wang",
title = "{Deca}: a Garbage Collection Optimizer for In-Memory
Data Processing",
journal = j-TOCS,
volume = "36",
number = "1",
pages = "3:1--3:??",
month = mar,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3310361",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:29 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3310361",
abstract = "In-memory caching of intermediate data and active
combining of data in shuffle buffers have been shown to
be very effective in minimizing the recomputation and
I/O cost in big data processing systems such as Spark
and Flink. However, it has also been widely reported
that these techniques would create a large amount of
long-living data objects in the heap. These generated
objects may quickly saturate the garbage collector,
especially when handling a large dataset, and hence,
limit the scalability of the system. To eliminate this
problem, we propose a lifetime-based memory management
framework, which, by automatically analyzing the
user-defined functions and data types, obtains the
expected lifetime of the data objects and then
allocates and releases memory space accordingly to
minimize the garbage collection overhead. In
particular, we present Deca$^1$ a concrete
implementation of our proposal on top of Spark, which
transparently decomposes and groups objects with
similar lifetimes into byte arrays and releases their
space altogether when their lifetimes come to an end.
When systems are processing very large data, Deca also
provides field-oriented memory pages to ensure high
compression efficiency. Extensive experimental studies
using both synthetic and real datasets show that, in
comparing to Spark, Deca is able to (1) reduce the
garbage collection time by up to 99.9\%, (2) reduce the
memory consumption by up to 46.6\% and the storage
space by 23.4\%, (3) achieve 1.2$ \times $ to 22.7$
\times $ speedup in terms of execution time in cases
without data spilling and 16$ \times $ to 41.6$ \times
$ speedup in cases with data spilling, and (4) provide
similar performance compared to domain-specific
systems.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Jha:2019:DFS,
author = "Sagar Jha and Jonathan Behrens and Theo Gkountouvas
and Matthew Milano and Weijia Song and Edward Tremel
and Robbert {Van Renesse} and Sydney Zink and Kenneth
P. Birman",
title = "{Derecho}: Fast State Machine Replication for Cloud
Services",
journal = j-TOCS,
volume = "36",
number = "2",
pages = "4:1--4:??",
month = apr,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3302258",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:30 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "See corrigendum \cite{Jha:2020:CDF}.",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3302258",
abstract = "Cloud computing services often replicate data and may
require ways to coordinate distributed actions. Here we
present Derecho, a library for such tasks. The API
provides interfaces for structuring applications into
patterns of subgroups and shards, supports state
machine replication within them, and includes
mechanisms that assist in restart after failures.
Running over 100Gbps RDMA, Derecho can send millions of
events per second in each subgroup or shard and
throughput peaks at 16GB/s, substantially outperforming
prior solutions. Configured to run purely on TCP,
Derecho is still substantially faster than comparable
widely used, highly-tuned, standard tools. The key
insight is that on modern hardware (including non-RDMA
networks), data-intensive protocols should be built
from non-blocking data-flow components.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bergman:2019:SSO,
author = "Shai Bergman and Tanya Brokhman and Tzachi Cohen and
Mark Silberstein",
title = "{SPIN}: Seamless Operating System Integration of
Peer-to-Peer {DMA} Between {SSDs} and {GPUs}",
journal = j-TOCS,
volume = "36",
number = "2",
pages = "5:1--5:??",
month = apr,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3309987",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:30 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309987",
abstract = "Recent GPUs enable Peer-to-Peer Direct Memory Access (
p2p) from fast peripheral devices like NVMe SSDs to
exclude the CPU from the data path between them for
efficiency. Unfortunately, using p2p to access files is
challenging because of the subtleties of low-level
non-standard interfaces, which bypass the OS file I/O
layers and may hurt system performance. Developers must
possess intimate knowledge of low-level interfaces to
manually handle the subtleties of data consistency and
misaligned accesses. We present SPIN, which integrates
p2p into the standard OS file I/O stack, dynamically
activating p2p where appropriate, transparently to the
user. It combines p2p with page cache accesses,
re-enables read-ahead for sequential reads, all while
maintaining standard POSIX FS consistency, portability
across GPUs and SSDs, and compatibility with virtual
block devices such as software RAID. We evaluate SPIN
on NVIDIA and AMD GPUs using standard file I/O
benchmarks, application traces, and end-to-end
experiments. SPIN achieves significant performance
speedups across a wide range of workloads, exceeding
p2p throughput by up to an order of magnitude. It also
boosts the performance of an aerial imagery rendering
application by 2.6$ \times $ by dynamically adapting to
its input-dependent file access pattern, enables 3.3$
\times $ higher throughput for a GPU-accelerated log
server, and enables 29\% faster execution for the
highly optimized GPU-accelerated image collage with
only 30 changed lines of code.",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Novakovic:2019:MLI,
author = "Stanko Novakovic and Alexandros Daglis and Dmitrii
Ustiugov and Edouard Bugnion and Babak Falsafi and
Boris Grot",
title = "Mitigating Load Imbalance in Distributed Data Serving
with Rack-Scale Memory Pooling",
journal = j-TOCS,
volume = "36",
number = "2",
pages = "6:1--6:??",
month = apr,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3309986",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:30 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3309986",
abstract = "To provide low-latency and high-throughput guarantees,
most large key-value stores keep the data in the memory
of many servers. Despite the natural parallelism across
lookups, the load imbalance, introduced by heavy skew
in the popularity distribution of keys, limits
performance. To avoid violating tail latency
service-level objectives, systems tend to keep server
utilization low and organize the data in micro-shards,
which provides units of migration and replication for
the purpose of load balancing. These techniques reduce
the skew but incur additional monitoring, data
replication, and consistency maintenance overheads. In
this work, we introduce RackOut, a memory pooling
technique that leverages the one-sided remote read
primitive of emerging rack-scale systems to mitigate
load imbalance while respecting service-level
objectives. In RackOut, the data are aggregated at
rack-scale granularity, with all of the participating
servers in the rack jointly servicing all of the rack's
micro-shards. We develop a queuing model to evaluate
the impact of RackOut at the datacenter scale. In
addition, we implement a RackOut proof-of-concept
key-value store, evaluate it on two experimental
platforms based on RDMA and Scale-Out NUMA, and use
these results to validate the model. We devise two
distinct approaches to load balancing within a RackOut
unit, one based on random selection of nodes ---
RackOut\_static --- and another one based on an
adaptive load balancing mechanism-RackOut\_adaptive.
Our results show that RackOut\_static increases
throughput by up to 6$ \times $ for RDMA and 8.6$
\times $ for Scale-Out NUMA compared to a scale-out
deployment, while respecting tight tail latency
service-level objectives. RackOut\_adaptive improves
the throughput by 30\% for workloads with 20\% of
writes over RackOut\_static.",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Iturbe:2019:ATC,
author = "Xabier Iturbe and Balaji Venu and Emre Ozer and
Jean-Luc Poupat and Gregoire Gimenez and Hans-Ulrich
Zurek",
title = "The {Arm Triple Core Lock-Step (TCLS)} Processor",
journal = j-TOCS,
volume = "36",
number = "3",
pages = "7:1--7:??",
month = aug,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3323917",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:30 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3323917",
abstract = "The Arm Triple Core Lock-Step (TCLS) architecture is
the natural evolution of Arm Cortex-R Dual Core
Lock-Step (DCLS) processors to increase dependability,
predictability, and availability in safety-critical and
ultra-reliable applications. TCLS is simple, scalable,
and easy to deploy in applications where Arm DCLS
processors are widely used (e.g., automotive), as well
as in new sectors where the presence of Arm technology
is incipient (e.g., enterprise) or almost non-existent
(e.g., space). Specifically in space, COTS Arm
processors provide optimal power-to-performance,
extensibility, evolvability, software availability, and
ease of use, especially in comparison with the decades
old rad-hard computing solutions that are still in use.
This article discusses the fundamentals of an Arm
Cortex-R5 based TCLS processor, providing key
functioning and implementation details. The article
shows that the TCLS architecture keeps the use of
rad-hard technology to a minimum, namely, using
rad-hard by design standard cell libraries only to
protect the critical parts that account for less than
4\% of the entire TCLS solution. Moreover, when
exposure to radiation is relatively low, such as in
terrestrial applications or even satellites operating
in Low Earth Orbits (LEO), the system could be
implemented entirely using commercial cell libraries,
relying on the radiation mitigation methods implemented
on the TCLS to cope with sporadic soft errors in its
critical parts. The TCLS solution allows thus to
significantly reduce chip manufacturing costs and keep
pace with advances in low power consumption and high
density integration by leveraging commercial
semiconductor processes, while matching the reliability
levels and improving availability that can be achieved
using extremely expensive rad-hard semiconductor
processes. Finally, the article describes a TRL4
proof-of-concept TCLS-based System-on-Chip (SoC) that
has been prototyped and tested to power the computer
on-board an Airbus Defence and Space telecom satellite.
When compared to the currently used processor solution
by Airbus, the TCLS-based SoC results in a more than 5$
\times $ performance increase and cuts power
consumption by more than half.",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Ainsworth:2019:SPI,
author = "Sam Ainsworth and Timothy M. Jones",
title = "Software Prefetching for Indirect Memory Accesses: a
Microarchitectural Perspective",
journal = j-TOCS,
volume = "36",
number = "3",
pages = "8:1--8:??",
month = aug,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3319393",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:30 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3319393",
abstract = "Many modern data processing and HPC workloads are
heavily memory-latency bound. A tempting proposition to
solve this is software prefetching, where special
non-blocking loads are used to bring data into the
cache hierarchy just before being required. However,
these are difficult to insert to effectively improve
performance, and techniques for automatic insertion are
currently limited. This article develops a novel
compiler pass to automatically generate software
prefetches for indirect memory accesses, a special
class of irregular memory accesses often seen in
high-performance workloads. We evaluate this across a
wide set of systems, all of which gain benefit from the
technique. We then evaluate the extent to which good
prefetch instructions are architecture dependent and
the class of programs that are particularly amenable.
Across a set of memory-bound benchmarks, our automated
pass achieves average speedups of 1.3$ \times $ for an
Intel Haswell processor, 1.1$ \times $ for both an ARM
Cortex-A57 and Qualcomm Kryo, 1.2$ \times $ for a
Cortex-72 and an Intel Kaby Lake, and 1.35$ \times $
for an Intel Xeon Phi Knight's Landing, each of which
is an out-of-order core, and performance improvements
of 2.1$ \times $ and 2.7$ \times $ for the in-order ARM
Cortex-A53 and first generation Intel Xeon Phi.",
acknowledgement = ack-nhfb,
articleno = "8",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Chen:2019:ISA,
author = "Yunji Chen and Huiying Lan and Zidong Du and Shaoli
Liu and Jinhua Tao and Dong Han and Tao Luo and Qi Guo
and Ling Li and Yuan Xie and Tianshi Chen",
title = "An Instruction Set Architecture for Machine Learning",
journal = j-TOCS,
volume = "36",
number = "3",
pages = "9:1--9:??",
month = aug,
year = "2019",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3331469",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Sep 21 11:44:30 MDT 2019",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/ft_gateway.cfm?id=3331469",
abstract = "Machine Learning (ML) are a family of models for
learning from the data to improve performance on a
certain task. ML techniques, especially recent renewed
neural networks (deep neural networks), have proven to
be efficient for a broad range of applications. ML
techniques are conventionally executed on
general-purpose processors (such as CPU and GPGPU),
which usually are not energy efficient, since they
invest excessive hardware resources to flexibly support
various workloads. Consequently, application-specific
hardware accelerators have been proposed recently to
improve energy efficiency. However, such accelerators
were designed for a small set of ML techniques sharing
similar computational patterns, and they adopt complex
and informative instructions (control signals) directly
corresponding to high-level functional blocks of an ML
technique (such as layers in neural networks) or even
an ML as a whole. Although straightforward and easy to
implement for a limited set of similar ML techniques,
the lack of agility in the instruction set prevents
such accelerator designs from supporting a variety of
different ML techniques with sufficient flexibility and
efficiency. In this article, we first propose a novel
domain-specific Instruction Set Architecture (ISA) for
NN accelerators, called Cambricon, which is a
load-store architecture that integrates scalar, vector,
matrix, logical, data transfer, and control
instructions, based on a comprehensive analysis of
existing NN techniques. We then extend the application
scope of Cambricon from NN to ML techniques. We also
propose an assembly language, an assembler, and runtime
to support programming with Cambricon, especially
targeting large-scale ML problems. Our evaluation over
a total of 16 representative yet distinct ML techniques
have demonstrated that Cambricon exhibits strong
descriptive capacity over a broad range of ML
techniques and provides higher code density than
general-purpose ISAs such as x86, MIPS, and GPGPU.
Compared to the latest state-of-the-art NN accelerator
design DaDianNao [7] (which can only accommodate three
types of NN techniques), our Cambricon-based
accelerator prototype implemented in TSMC 65nm
technology incurs only negligible latency/power/area
overheads, with a versatile coverage of 10 different NN
benchmarks and 7 other ML benchmarks. Compared to the
recent prevalent ML accelerator PuDianNao, our
Cambricon-based accelerator is able to support all the
ML techniques as well as the 10 NNs but with only
approximate 5.1\% performance loss.",
acknowledgement = ack-nhfb,
articleno = "9",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "http://portal.acm.org/browse_dl.cfm?idx=J774",
}
@Article{Bai:2020:EDS,
author = "Jia-Ju Bai and Julia Lawall and Shi-Min Hu",
title = "Effective Detection of Sleep-in-atomic-context Bugs in
the {Linux} Kernel",
journal = j-TOCS,
volume = "36",
number = "4",
pages = "10:1--10:30",
month = jun,
year = "2020",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3381990",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jun 12 07:20:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/linux.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/unix.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3381990",
abstract = "Atomic context is an execution state of the Linux
kernel in which kernel code monopolizes a CPU core. In
this state, the Linux kernel may only perform
operations that cannot sleep, as otherwise a system
hang or crash may occur. We refer to this kind of
concurrency bug as a sleep-in-atomic-context (SAC) bug.
In practice, SAC bugs are hard to find, as they do not
cause problems in all executions.\par
In this article, we propose a practical static approach
named DSAC to effectively detect SAC bugs in the Linux
kernel. DSAC uses three key techniques: (1) a
summary-based analysis to identify the code that may be
executed in atomic context, (2) a connection-based
alias analysis to identify the set of functions
referenced by a function pointer, and (3) a path-check
method to filter out repeated reports and false bugs.
We evaluate DSAC on Linux 4.17 and find 1,159 SAC bugs.
We manually check all the bugs and find that 1,068 bugs
are real. We have randomly selected 300 of the real
bugs and sent them to kernel developers. 220 of these
bugs have been confirmed, and 51 of our patches fixing
115 bugs have been applied.",
acknowledgement = ack-nhfb,
articleno = "10",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Malkhi:2020:ISI,
author = "Dahlia Malkhi and Dan Tsafrir",
title = "Introduction to the Special Issue on the Award Papers
of {USENIX ATC 2019}",
journal = j-TOCS,
volume = "36",
number = "4",
pages = "11:1--11:2",
month = jun,
year = "2020",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3395034",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jun 12 07:20:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3395034",
acknowledgement = ack-nhfb,
articleno = "11",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Balmau:2020:SPL,
author = "Oana Balmau and Florin Dinu and Willy Zwaenepoel and
Karan Gupta and Ravishankar Chandhiramoorthi and Diego
Didona",
title = "{SILK+} Preventing Latency Spikes in Log-Structured
Merge Key--Value Stores Running Heterogeneous
Workloads",
journal = j-TOCS,
volume = "36",
number = "4",
pages = "12:1--12:27",
month = jun,
year = "2020",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3380905",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jun 12 07:20:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380905",
abstract = "Log-Structured Merge Key-Value stores (LSM KVs) are
designed to offer good write performance, by capturing
client writes in memory, and only later flushing them
to storage. Writes are later compacted into a tree-like
data structure on disk to improve \ldots{}",
acknowledgement = ack-nhfb,
articleno = "12",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Leesatapornwongsa:2020:TWT,
author = "Tanakorn Leesatapornwongsa and Aritra Sengupta and
Masoud Saeida Ardekani and Gustavo Petri and Cesar
A. Stuardo",
title = "Transactuations: Where Transactions Meet the Physical
World",
journal = j-TOCS,
volume = "36",
number = "4",
pages = "13:1--13:31",
month = jun,
year = "2020",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3380907",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jun 12 07:20:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3380907",
abstract = "A large class of IoT applications read sensors,
execute application logic, and actuate actuators.
However, the lack of high-level programming
abstractions compromises correctness, especially in the
presence of failures and unwanted interleaving between
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "13",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Spink:2020:RSL,
author = "Tom Spink and Harry Wagstaff and Bj{\"o}rn Franke",
title = "A Retargetable System-level {DBT} Hypervisor",
journal = j-TOCS,
volume = "36",
number = "4",
pages = "14:1--14:24",
month = jun,
year = "2020",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3386161",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jun 12 07:20:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/abs/10.1145/3386161",
abstract = "System-level Dynamic Binary Translation (DBT) provides
the capability to boot an Operating System (OS) and
execute programs compiled for an Instruction Set
Architecture (ISA) different from that of the host
machine. Due to their performance-critical \ldots{}",
acknowledgement = ack-nhfb,
articleno = "14",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Jha:2020:CDF,
author = "Sagar Jha",
title = "Corrigendum to {``Derecho: Fast State Machine
Replication for Cloud Services,'' by Jha et al., ACM
Transactions on Computer Systems (TOCS) Volume {\bf
36}, Issue 2, Article No. 4}",
journal = j-TOCS,
volume = "36",
number = "4",
pages = "15:1--15:1",
month = jun,
year = "2020",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3395604",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jun 12 07:20:51 MDT 2020",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
note = "See \cite{Jha:2019:DFS}.",
URL = "https://dl.acm.org/doi/abs/10.1145/3395604",
acknowledgement = ack-nhfb,
articleno = "15",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Sadrosadati:2021:HCL,
author = "Mohammad Sadrosadati and Amirhossein Mirhosseini and
Ali Hajiabadi and Seyed Borna Ehsani and Hajar Falahati
and Hamid Sarbazi-Azad and Mario Drumond and Babak
Falsafi and Rachata Ausavarungnirun and Onur Mutlu",
title = "Highly Concurrent Latency-tolerant Register Files for
{GPUs}",
journal = j-TOCS,
volume = "37",
number = "1--4",
pages = "1:1--1:36",
month = mar,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3419973",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue May 25 09:04:45 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3419973",
abstract = "Graphics Processing Units (GPUs) employ large register
files to accommodate all active threads and accelerate
context switching. Unfortunately, register files are a
scalability bottleneck for future GPUs due to long
access latency, high power consumption, and large
silicon area provisioning. Prior work proposes
hierarchical register file to reduce the register file
power consumption by caching registers in a smaller
register file cache. Unfortunately, this approach does
not improve register access latency due to the low hit
rate in the register file cache.\par
In this article, we propose the Latency-Tolerant
Register File (LTRF) architecture to achieve low
latency in a two-level hierarchical structure while
keeping power consumption low. We observe that
compile-time interval analysis enables us to divide GPU
program execution into intervals with an accurate
estimate of a warp's aggregate register working-set
within each interval. The key idea of LTRF is to
prefetch the estimated register working-set from the
main register file to the register file cache under
software control, at the beginning of each interval,
and overlap the prefetch latency with the execution of
other warps. We observe that register bank conflicts
while prefetching the registers could greatly reduce
the effectiveness of LTRF. Therefore, we devise a
compile-time register renumbering technique to reduce
the likelihood of register bank conflicts. Our
experimental results show that LTRF enables
high-capacity yet long-latency main GPU register files,
paving the way for various optimizations. As an example
optimization, we implement the main register file with
emerging high-density high-latency memory technologies,
enabling $ 8 \times $ larger capacity and improving
overall GPU performance by 34\%.",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Zhang:2021:KSV,
author = "Yiming Zhang and Chengfei Zhang and Yaozheng Wang and
Kai Yu and Guangtao Xue and Jon Crowcroft",
title = "{KylinX}: Simplified Virtualization Architecture for
Specialized Virtual Appliances with Strong Isolation",
journal = j-TOCS,
volume = "37",
number = "1--4",
pages = "2:1--2:27",
month = mar,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3436512",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue May 25 09:04:45 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/doi/10.1145/3436512",
abstract = "Unikernel specializes a minimalistic LibOS and a
target application into a standalone single-purpose
virtual machine (VM) running on a hypervisor, which is
referred to as (virtual) appliance. Compared to
traditional VMs, Unikernel appliances have smaller
memory footprint and lower overhead while guaranteeing
the same level of isolation. On the downside, Unikernel
strips off the process abstraction from its monolithic
appliance and thus sacrifices flexibility, efficiency,
and applicability.\par
In this article, we examine whether there is a balance
embracing the best of both Unikernel appliances (strong
isolation) and processes (high flexibility/efficiency).
We present KylinX, a dynamic library operating system
for simplified and efficient cloud virtualization by
providing the pVM (process-like VM) abstraction. A pVM
takes the hypervisor as an OS and the Unikernel
appliance as a process allowing both page-level and
library-level dynamic mapping. At the page level,
KylinX supports pVM fork plus a set of API for
inter-pVM communication (IpC, which is compatible with
conventional UNIX IPC). At the library level, KylinX
supports shared libraries to be linked to a Unikernel
appliance at runtime. KylinX enforces mapping
restrictions against potential threats. We implement a
prototype of KylinX by modifying MiniOS and Xen tools.
Extensive experimental results show that KylinX
achieves similar performance both in micro benchmarks
(fork, IpC, library update, etc.) and in applications
(Redis, web server, and DNS server) compared to
conventional processes, while retaining the strong
isolation benefit of VMs/Unikernels.",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Richins:2021:ATH,
author = "Daniel Richins and Dharmisha Doshi and Matthew
Blackmore and Aswathy Thulaseedharan Nair and Neha
Pathapati and Ankit Patel and Brainard Daguman and
Daniel Dobrijalowski and Ramesh Illikkal and Kevin Long
and David Zimmerman and Vijay Janapa Reddi",
title = "{AI} Tax: The Hidden Cost of {AI} Data Center
Applications",
journal = j-TOCS,
volume = "37",
number = "1--4",
pages = "3:1--3:32",
month = mar,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3440689",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue May 25 09:04:45 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3440689",
abstract = "Artificial intelligence and machine learning are
experiencing widespread adoption in industry and
academia. This has been driven by rapid advances in the
applications and accuracy of AI through increasingly
complex algorithms and models; this, in turn, has
spurred research into specialized hardware AI
accelerators. Given the rapid pace of advances, it is
easy to forget that they are often developed and
evaluated in a vacuum without considering the full
application environment. This article emphasizes the
need for a holistic, end-to-end analysis of artificial
intelligence (AI) workloads and reveals the ``AI tax.''
We deploy and characterize Face Recognition in an edge
data center. The application is an AI-centric edge
video analytics application built using popular open
source infrastructure and machine learning (ML) tools.
Despite using state-of-the-art AI and ML algorithms,
the application relies heavily on pre- and
post-processing code. As AI-centric applications
benefit from the acceleration promised by accelerators,
we find they impose stresses on the hardware and
software infrastructure: storage and network bandwidth
become major bottlenecks with increasing AI
acceleration. By specializing for AI applications, we
show that a purpose-built edge data center can be
designed for the stresses of accelerated AI at 15\%
lower TCO than one derived from homogeneous servers and
infrastructure.",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Baskin:2021:UUN,
author = "Chaim Baskin and Natan Liss and Eli Schwartz and
Evgenii Zheltonozhskii and Raja Giryes and Alex M.
Bronstein and Avi Mendelson",
title = "{UNIQ}: Uniform Noise Injection for Non-Uniform
Quantization of Neural Networks",
journal = j-TOCS,
volume = "37",
number = "1--4",
pages = "4:1--4:15",
month = mar,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3444943",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue May 25 09:04:45 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3444943",
abstract = "We present a novel method for neural network
quantization. Our method, named UNIQ, emulates a
non-uniform $k$-quantile quantizer and adapts the model
to perform well with quantized weights by injecting
noise to the weights at training time. As a by-product
of injecting noise to weights, we find that activations
can also be quantized to as low as 8-bit with only a
minor accuracy degradation. Our non-uniform
quantization approach provides a novel alternative to
the existing uniform quantization techniques for neural
networks. We further propose a novel complexity metric
of number of bit operations performed (BOPs), and we
show that this metric has a linear relation with logic
utilization and power. We suggest evaluating the
trade-off of accuracy vs. complexity (BOPs). The
proposed method, when evaluated on ResNet18/34/50 and
MobileNet on ImageNet, outperforms the prior state of
the art both in the low-complexity regime and the high
accuracy regime. We demonstrate the practical
applicability of this approach, by implementing our
non-uniformly quantized CNN on FPGA.",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Zhuo:2021:DGP,
author = "Youwei Zhuo and Jingji Chen and Gengyu Rao and Qinyi
Luo and Yanzhi Wang and Hailong Yang and Depei Qian and
Xuehai Qian",
title = "Distributed Graph Processing System and
Processing-in-memory Architecture with Precise
Loop-carried Dependency Guarantee",
journal = j-TOCS,
volume = "37",
number = "1--4",
pages = "5:1--5:37",
month = jun,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3453681",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jul 2 08:25:18 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3453681",
abstract = "To hide the complexity of the underlying system, graph
processing frameworks ask programmers to specify graph
computations in user-defined functions (UDFs) of
graph-oriented programming model. Due to the nature of
distributed execution, current \ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Agate:2021:SSE,
author = "Vincenzo Agate and Alessandra {De Paola} and Giuseppe
{Lo Re} and Marco Morana",
title = "A Simulation Software for the Evaluation of
Vulnerabilities in Reputation Management Systems",
journal = j-TOCS,
volume = "37",
number = "1--4",
pages = "6:1--6:30",
month = jun,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3458510",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Fri Jul 2 08:25:18 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3458510",
abstract = "Multi-agent distributed systems are characterized by
autonomous entities that interact with each other to
provide, and/or request, different kinds of services.
In several contexts, especially when a reward is
offered according to the quality of service, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Ruaro:2021:MDM,
author = "Marcelo Ruaro and Anderson Sant'ana and Axel Jantsch
and Fernando Gehm Moraes",
title = "Modular and Distributed Management of Many-Core
{SoCs}",
journal = j-TOCS,
volume = "38",
number = "1--2",
pages = "1:1--1:16",
month = jul,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3458511",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 10 13:25:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3458511",
abstract = "Many-Core Systems-on-Chip increasingly require Dynamic
Multi-objective Management (DMOM) of resources. DMOM
uses different management components for objectives and
resources to implement comprehensive and self-adaptive
system resource management. DMOMs \ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Markussen:2021:SZO,
author = "Jonas Markussen and Lars Bj{\o}rlykke Kristiansen and
P{\aa}l Halvorsen and Halvor Kielland-Gyrud and
H{\aa}kon Kvale Stensland and Carsten Griwodz",
title = "{SmartIO}: Zero-overhead Device Sharing through {PCIe}
Networking",
journal = j-TOCS,
volume = "38",
number = "1--2",
pages = "2:1--2:78",
month = jul,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3462545",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 10 13:25:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3462545",
abstract = "The large variety of compute-heavy and data-driven
applications accelerate the need for a distributed I/O
solution that enables cost-effective scaling of
resources between networked hosts. For example, in a
cluster system, different machines may have \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Katsikas:2021:MHP,
author = "Georgios P. Katsikas and Tom Barbette and Dejan
Kosti{\'c} and Gerald Q. {Maguire, Jr.} and Rebecca
Steinert",
title = "{Metron}: High-performance {NFV} Service Chaining Even
in the Presence of Blackboxes",
journal = j-TOCS,
volume = "38",
number = "1--2",
pages = "3:1--3:45",
month = jul,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3465628",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 10 13:25:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3465628",
abstract = "Deployment of 100Gigabit Ethernet (GbE) links
challenges the packet processing limits of commodity
hardware used for Network Functions Virtualization
(NFV). Moreover, realizing chained network functions
(i.e., service chains) necessitates the use of
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Zuo:2021:SIS,
author = "Zhiqiang Zuo and Kai Wang and Aftab Hussain and
Ardalan Amiri Sani and Yiyu Zhang and Shenming Lu and
Wensheng Dou and Linzhang Wang and Xuandong Li and
Chenxi Wang and Guoqing Harry Xu",
title = "Systemizing Interprocedural Static Analysis of
Large-scale Systems Code with {Graspan}",
journal = j-TOCS,
volume = "38",
number = "1--2",
pages = "4:1--4:39",
month = jul,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3466820",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Tue Aug 10 13:25:43 MDT 2021",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3466820",
abstract = "There is more than a decade-long history of using
static analysis to find bugs in systems such as Linux.
Most of the existing static analyses developed for
these systems are simple checkers that find bugs based
on pattern matching. Despite the presence \ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Song:2021:ANF,
author = "Won Wook Song and Youngseok Yang and Jeongyoon Eo and
Jangho Seo and Joo Yeon Kim and Sanha Lee and Gyewon
Lee and Taegeon Um and Haeyoon Cho and Byung-Gon Chun",
title = "{Apache Nemo}: a Framework for Optimizing Distributed
Data Processing",
journal = j-TOCS,
volume = "38",
number = "3--4",
pages = "5:1--5:31",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3468144",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Apr 18 11:45:45 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3468144",
abstract = "Optimizing scheduling and communication of distributed
data processing for resource and data characteristics
is crucial for achieving high performance. Existing
approaches to such optimizations largely fall into two
categories. First, distributed runtimes \ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Canakci:2021:SMB,
author = "Burcu Canakci and Robbert {Van Renesse}",
title = "Scaling Membership of {Byzantine} Consensus",
journal = j-TOCS,
volume = "38",
number = "3--4",
pages = "6:1--6:31",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3473138",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Apr 18 11:45:45 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3473138",
abstract = "Scaling Byzantine Fault Tolerant (BFT) systems in
terms of membership is important for secure
applications with large participation such as
blockchains. While traditional protocols have low
latency, they cannot handle many processors.
Conversely, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Kumar:2021:SSF,
author = "Rakesh Kumar and Boris Grot",
title = "Shooting Down the Server Front-End Bottleneck",
journal = j-TOCS,
volume = "38",
number = "3--4",
pages = "7:1--7:30",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3484492",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Mon Apr 18 11:45:45 MDT 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3484492",
abstract = "The front-end bottleneck is a well-established problem
in server workloads owing to their deep software stacks
and large instruction footprints. Despite years of
research into effective L1-I and BTB prefetching,
state-of-the-art techniques force a trade-. \ldots{}",
acknowledgement = ack-nhfb,
articleno = "7",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Lyerly:2021:ORT,
author = "Robert Lyerly and Carlos Bilbao and Changwoo Min and
Christopher J. Rossbach and Binoy Ravindran",
title = "An {OpenMP} Runtime for Transparent Work Sharing
across Cache-Incoherent Heterogeneous Nodes",
journal = j-TOCS,
volume = "39",
number = "1--4",
pages = "1:1--1:??",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3505224",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/pvm.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3505224",
abstract = "In this work, we present libHetMP, an OpenMP runtime
for automatically and transparently distributing
parallel computation across heterogeneous \ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Chen:2021:UHM,
author = "Lei Chen and Jiacheng Zhao and Chenxi Wang and Ting
Cao and John Zigman and Haris Volos and Onur Mutlu and
Fang Lv and Xiaobing Feng and Guoqing Harry Xu and
Huimin Cui",
title = "Unified Holistic Memory Management Supporting Multiple
Big Data Processing Frameworks over Hybrid Memories",
journal = j-TOCS,
volume = "39",
number = "1--4",
pages = "2:1--2:??",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3511211",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3511211",
abstract = "To process real-world datasets, modern data-parallel
systems often require extremely large amounts of
memory, which are both costly and energy \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Boroujerdian:2021:RCA,
author = "Behzad Boroujerdian and Hasan Genc and Srivatsan
Krishnan and Bardienus Pieter Duisterhof and Brian
Plancher and Kayvan Mansoorshahi and Marcelino Almeida
and Wenzhi Cui and Aleksandra Faust and Vijay Janapa
Reddi",
title = "The Role of Compute in Autonomous Micro Aerial
Vehicles: Optimizing for Mission Time and Energy
Efficiency",
journal = j-TOCS,
volume = "39",
number = "1--4",
pages = "3:1--3:??",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3511210",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3511210",
abstract = "Autonomous and mobile cyber-physical machines are
becoming an inevitable part of our future. In
particular, Micro Aerial Vehicles (MAVs) have seen a
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Blocher:2021:RAO,
author = "Marcel Bl{\"o}cher and Emilio Coppa and Pascal Kleber
and Patrick Eugster and William Culhane and Masoud
Saeida Ardekani",
title = "{ROME}: All Overlays Lead to Aggregation, but Some Are
Faster than Others",
journal = j-TOCS,
volume = "39",
number = "1--4",
pages = "4:1--4:??",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3516430",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3516430",
abstract = "Aggregation is common in data analytics and crucial to
distilling information from large datasets, but current
data analytics frameworks do not fully exploit
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Xing:2021:HCE,
author = "Tong Xing and Antonio Barbalace and Pierre Olivier and
Mohamed L. Karaoui and Wei Wang and Binoy Ravindran",
title = "{H-Container}: Enabling Heterogeneous-{ISA} Container
Migration in Edge Computing",
journal = j-TOCS,
volume = "39",
number = "1--4",
pages = "5:1--5:??",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3524452",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3524452",
abstract = "Edge computing is a recent computing paradigm that
brings cloud services closer to the client. Among other
features, edge computing offers extremely low
\ldots{}",
acknowledgement = ack-nhfb,
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Xia:2021:BIP,
author = "Yubin Xia and Dong Du and Zhichao Hua and Binyu Zang
and Haibo Chen and Haibing Guan",
title = "Boosting Inter-process Communication with
Architectural Support",
journal = j-TOCS,
volume = "39",
number = "1--4",
pages = "6:1--6:??",
month = nov,
year = "2021",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3532861",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3532861",
abstract = "IPC (inter-process communication) is a critical
mechanism for modern OSes, including not only
microkernels such as seL4, QNX, and Fuchsia where
system \ldots{}",
acknowledgement = ack-nhfb,
articleno = "6",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Diavastos:2022:EIS,
author = "Andreas Diavastos and Trevor E. Carlson",
title = "Efficient Instruction Scheduling Using Real-time Load
Delay Tracking",
journal = j-TOCS,
volume = "40",
number = "1--4",
pages = "1:1--1:??",
month = nov,
year = "2022",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3548681",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3548681",
abstract = "Issue time prediction processors use dataflow
dependencies and predefined instruction latencies to
predict issue times of repeated instructions. In this
work, \ldots{}",
acknowledgement = ack-nhfb,
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Biswas:2022:UPR,
author = "Arnab Kumar Biswas",
title = "Using Pattern of On-Off Routers and Links and Router
Delays to Protect Network-on-Chip Intellectual
Property",
journal = j-TOCS,
volume = "40",
number = "1--4",
pages = "2:1--2:??",
month = nov,
year = "2022",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3548680",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Dec 8 06:35:07 MST 2022",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3548680",
abstract = "Intellectual Property (IP) reuse is a well known
practice in chip design processes. Nowadays,
network-on-chips (NoCs) are increasingly used as IP and
sold by \ldots{}",
acknowledgement = ack-nhfb,
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Alkhatib:2023:PNP,
author = "Basil Alkhatib and Sreeharsha Udayashankar and Sara
Qunaibi and Ahmed Alquraan and Mohammed Alfatafta and
Wael Al-Manasrah and Alex Depoutovitch and Samer
Al-Kiswany",
title = "Partial Network Partitioning",
journal = j-TOCS,
volume = "41",
number = "1--4",
pages = "1:1--1:??",
month = nov,
year = "2023",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3576192",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 3 11:39:05 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3576192",
abstract = "We present an extensive study focused on partial
network partitioning. Partial network partitions
disrupt the communication between some but not all
nodes in \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Sheff:2023:CRB,
author = "Isaac Sheff and Xinwen Wang and Kushal Babel and
Haobin Ni and Robbert van Renesse and Andrew C. Myers",
title = "{Charlotte}: Reformulating Blockchains into a {Web} of
Composable Attested Data Structures for Cross-Domain
Applications",
journal = j-TOCS,
volume = "41",
number = "1--4",
pages = "2:1--2:??",
month = nov,
year = "2023",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3607534",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 3 11:39:05 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/bitcoin.bib;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3607534",
abstract = "Cross- domain applications are rapidly adopting
blockchain techniques for immutability, availability,
integrity, and interoperability. However, for most
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Park:2023:FFM,
author = "Jonggyu Park and Young Ik Eom",
title = "Filesystem Fragmentation on Modern Storage Systems",
journal = j-TOCS,
volume = "41",
number = "1--4",
pages = "3:1--3:??",
month = nov,
year = "2023",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3611386",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 3 11:39:05 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3611386",
abstract = "Filesystem fragmentation has been one of the primary
reasons for computer systems to get slower over time.
However, there have been rapid changes in modern
storage systems over the past decades, and modern
storage devices such as solid state drives have
different mechanisms to access data, compared with
traditional rotational ones. In this article, we
revisit filesystem fragmentation on modern computer
systems from both performance and fairness
perspectives. According to our extensive experiments,
filesystem fragmentation not only degrades I/O
performance of modern storage devices, but also incurs
various problems related to I/O fairness, such as
performance interference. Unfortunately, conventional
defragmentation tools are designed primarily for hard
disk drives and thus generate an unnecessarily large
amount of I/Os for data migration. To mitigate such
problems, this article present FragPicker, a new
defragmentation tool for modern storage devices.
FragPicker analyzes the I/O behaviors of each target
application and defragments only necessary pieces of
data whose migration can contribute to performance
improvement, thereby effectively minimizing the I/O
amount for defragmentation. Our evaluation with YCSB
workload-C shows FragPicker reduces the total amount of
I/O for defragmentation by around 66\% and the elapsed
time by around 84\%, while showing a similar level of
defragmentation effect.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Pellauer:2023:SOS,
author = "Michael Pellauer and Jason Clemons and Vignesh Balaji
and Neal Crago and Aamer Jaleel and Donghyuk Lee and
Mike O'Connor and Anghsuman Parashar and Sean Treichler
and Po-An Tsai and Stephen W. Keckler and Joel S.
Emer",
title = "{Symphony}: Orchestrating Sparse and Dense Tensors
with Hierarchical Heterogeneous Processing",
journal = j-TOCS,
volume = "41",
number = "1--4",
pages = "4:1--4:??",
month = nov,
year = "2023",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3630007",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 3 11:39:05 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3630007",
abstract = "Sparse tensor algorithms are becoming widespread,
particularly in the domains of deep learning, graph and
data analytics, and scientific computing. Current
\ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Zhao:2023:MIB,
author = "Jie Zhao and Jinchen Xu and Peng Di and Wang Nie and
Jiahui Hu and Yanzhi Yi and Sijia Yang and Zhen Geng
and Renwei Zhang and Bojie Li and Zhiliang Gan and
Xuefeng Jin",
title = "Modeling the Interplay between Loop Tiling and Fusion
in Optimizing Compilers Using Affine Relations",
journal = j-TOCS,
volume = "41",
number = "1--4",
pages = "5:1--5:??",
month = nov,
year = "2023",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3635305",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Sat Feb 3 11:39:05 MST 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3635305",
abstract = "Loop tiling and fusion are two essential
transformations in optimizing compilers to enhance the
data locality of programs. Existing heuristics either
perform loop tiling and fusion in a particular order,
missing some of their profitable compositions, or
execute ad-hoc implementations for domain-specific
applications, calling for a generalized and systematic
solution in optimizing compilers.\par
In this article, we present a so-called {\em basteln\/}
(an abbreviation for backward slicing of tiled loop
nests) strategy in polyhedral compilation to better
model the interplay between loop tiling and fusion. The
basteln strategy first groups loop nests by preserving
their parallelism\slash tilability and next performs
rectangular\slash parallelogram tiling to the output
groups that produce data consumed outside the
considered program fragment. The memory footprints
required by each tile are then computed, from which the
upward exposed data are extracted to determine the tile
shapes of the remaining fusion groups. Such a tiling
mechanism can construct complex tile shapes imposed by
the dependences between these groups, which are further
merged by a post-tiling fusion algorithm for enhancing
data locality without losing the parallelism\slash
tilability of the output groups. The basteln strategy
also takes into account the amount of redundant
computations and the fusion of independent groups,
exhibiting a general applicability.\par
We integrate the basteln strategy into two optimizing
compilers, with one a general-purpose optimizer and the
other a domain-specific compiler for deploying deep
learning models. The experiments are conducted on CPU,
GPU, and a deep learning accelerator to demonstrate the
effectiveness of the approach for a wide class of
application domains, including deep learning, image
processing, sparse matrix computation, and linear
algebra. In particular, the basteln strategy achieves a
mean speedup of $ 1.8 \times $ over cuBLAS\slash cuDNN
and $ 1.1 \times $ over TVM on GPU when used to
optimize deep learning models; it also outperforms PPCG
and TVM by 11\% and 20\%, respectively, when generating
code for the deep learning accelerator.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "5",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Luo:2024:ORM,
author = "Shutian Luo and Chenyu Lin and Kejiang Ye and Guoyao
Xu and Liping Zhang and Guodong Yang and Huanle Xu and
Chengzhong Xu",
title = "Optimizing Resource Management for Shared
Microservices: a Scalable System Design",
journal = j-TOCS,
volume = "42",
number = "1--2",
pages = "1:1--1:??",
month = may,
year = "2024",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3631607",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 16 10:49:47 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3631607",
abstract = "A common approach to improving resource utilization in
data centers is to adaptively provision resources based
on the actual workload. One fundamental challenge of
doing this in microservice management frameworks,
however, is that different components of \ldots{}",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "1",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Zhao:2024:CDC,
author = "Laiping Zhao and Yushuai Cui and Yanan Yang and Xiaobo
Zhou and Tie Qiu and Keqiu Li and Yungang Bao",
title = "Component-distinguishable Co-location and Resource
Reclamation for High-throughput Computing",
journal = j-TOCS,
volume = "42",
number = "1--2",
pages = "2:1--2:??",
month = may,
year = "2024",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3630006",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 16 10:49:47 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib",
URL = "https://dl.acm.org/doi/10.1145/3630006",
abstract = "Cloud service providers improve resource utilization
by co-locating latency-critical (LC) workloads with
best-effort batch (BE) jobs in datacenters. However,
they usually treat multi-component LCs as monolithic
applications and treat BEs as ``second-class
\ldots{}''",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "2",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Kappes:2024:DFU,
author = "Giorgos Kappes and Stergios V. Anastasiadis",
title = "{Diciclo}: Flexible User-level Services for Efficient
Multitenant Isolation",
journal = j-TOCS,
volume = "42",
number = "1--2",
pages = "3:1--3:??",
month = may,
year = "2024",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3639404",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 16 10:49:47 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/doi/10.1145/3639404",
abstract = "Containers are a mainstream virtualization technique
for running stateful workloads over persistent storage.
In highly utilized multitenant hosts, resource
contention at the system kernel leads to inefficient
container input/output (I/O) handling. Although there
are interesting techniques to address this issue, they
incur high implementation complexity and execution
overhead. As a cost-effective alternative, we introduce
the Diciclo architecture with our assumptions, goals,
and principles. For each tenant, Diciclo isolates the
control and data I/O path at user level and runs
dedicated storage systems. Diciclo includes the
libservice unified user-level abstraction of system
services and the node structure design pattern for the
application and server side. We prototyped a toolkit of
user-level components that comprise the library to
invoke the standard I/O calls, the I/O communication
mechanism, and the I/O services. Based on Diciclo, we
built Danaus, a filesystem client that integrates a
union filesystem with a Ceph distributed filesystem
client and configurable shared cache. Across different
host configurations, workloads, and systems, Danaus
achieves improved performance stability, because it
handles I/O with reserved per-tenant resources and
avoids intensive kernel locking. Based on having built
and evaluated Danaus, we share valuable lessons about
resource contention, file management, service
separation, and performance stability in multitenant
systems.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "3",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Article{Sha:2024:HSC,
author = "Sai Sha and Chuandong Li and Xiaolin Wang and Zhenlin
Wang and Yingwei Luo",
title = "Hardware--Software Collaborative Tiered-Memory
Management Framework for Virtualization",
journal = j-TOCS,
volume = "42",
number = "1--2",
pages = "4:1--4:??",
month = may,
year = "2024",
CODEN = "ACSYEC",
DOI = "https://doi.org/10.1145/3639564",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu May 16 10:49:47 MDT 2024",
bibsource = "https://www.math.utah.edu/pub/tex/bib/tocs.bib;
https://www.math.utah.edu/pub/tex/bib/virtual-machines.bib",
URL = "https://dl.acm.org/doi/10.1145/3639564",
abstract = "The tiered-memory system can effectively expand the
memory capacity for virtual machines (VMs). However,
virtualization introduces new challenges specifically
in enforcing performance isolation, minimizing context
switching, and providing resource overcommit. None of
the state-of-the-art designs consider virtualization
and address these challenges; we observe that a VM with
tiered memory incurs up to a $ 2 \times $ slowdown
compared to a DRAM-only VM.\par
We propose vTMM, a hardware-software collaborative
tiered-memory management framework for virtualization.
A key insight in vTMM is to leverage the unique system
features in virtualization to meet the above
challenges. vTMM automatically determines page hotness
and migrates pages between fast and slow memory to
achieve better performance. Specially, vTMM optimizes
page tracking and migration based on page-modification
logging (PML), a hardware-assisted virtualization
mechanism, and adaptively distinguishes hot/cold pages
through the page ``temperature'' sorting. vTMM also
dynamically adjusts fast memory among multi-VMs on
demand by using a memory pool. Further, vTMM tracks
huge pages at regular-page granularity in hardware and
splits/merges pages in software, realizing
hybrid-grained page management and optimization. We
implement and evaluate vTMM with single-grained page
management on an Intel processor, and the
hybrid-grained page management on a Sunway processor
with hardware mode supporting hardware/software
co-designs. Experiments show that vTMM outperforms
existing tiered-memory management designs in
virtualization.",
acknowledgement = ack-nhfb,
ajournal = "ACM Trans. Comput. Syst.",
articleno = "4",
fjournal = "ACM Transactions on Computer Systems",
journal-URL = "https://dl.acm.org/loi/tocs",
}
@Proceedings{ACM:1988:ASS,
editor = "ACM",
booktitle = "{1988 ACM\slash SIGOPS Symposium on Operating Systems
Principles}",
title = "{1988 ACM\slash SIGOPS Symposium on Operating Systems
Principles}",
volume = "6(1)",
publisher = pub-ACM,
address = pub-ACM:adr,
pages = "??--??",
month = feb,
year = "1988",
CODEN = "ACSYEC",
ISSN = "0734-2071 (print), 1557-7333 (electronic)",
ISSN-L = "0734-2071",
bibdate = "Thu Jan 14 06:47:30 MST 1999",
bibsource = "Compendex database;
https://www.math.utah.edu/pub/tex/bib/tocs.bib",
series = j-TOCS,
abstract = "This issue contains 6 conference papers. The topics
covered are: stored-voice management in the Etherphone
system; 801 storage; scale and performance of a
distributed file system; recovery performance in
QuickSilver; fine-grained mobility in the Emerald
system; caching in the Sprite network file system.",
acknowledgement = ack-nhfb,
classification = "723",
conference = "1988 ACM\slash SIGOPS Symposium on Operating Systems
Principles.",
keywords = "801 storage; computer architecture; computer networks;
computer operating systems; computer systems, digital
--- Distributed; Emerald system; Etherphone system;
QuickSilver; Sprite network file system",
pagecount = "154",
sponsor = "ACM, Special Interest Group on Operating Systems, New
York, NY, USA",
}